@article{OShea2017, abstract = {Background Internet access and usage has changed how people seek and report health information. Meanwhile,infectious diseases continue to threaten humanity. The analysis of Big Data, or vast digital data, presents an opportunity to improve disease surveillance and epidemic intelligence. Epidemic intelligence contains two components: indicator based and event-based. A relatively new surveillance type has emerged called event-based Internet biosurveillance systems. These systems use information on events impacting health from Internet sources, such as social media or news aggregates. These systems circumvent the limitations of traditional reporting systems by being inexpensive, transparent, and flexible. Yet, innovations and the functionality of these systems can change rapidly. Aim To update the current state of knowledge on event-based Internet biosurveillance systems by identifying all systems, including current functionality, with hopes to aid decision makers with whether to incorporate new methods into comprehensive programmes of surveillance. Methods A systematic review was performed through PubMed, Scopus, and Google Scholar databases, while also including grey literature and other publication types. Results 50 event-based Internet systems were identified, including an extraction of 15 attributes for each system, described in 99 articles. Each system uses different innovative technology and data sources to gather data, process, and disseminate data to detect infectious disease outbreaks. Conclusions The review emphasises the importance of using both formal and informal sources for timely and accurate infectious disease outbreak surveillance, cataloguing all event-based Internet biosurveillance systems. By doing so, future researchers will be able to use this review as a library for referencing systems, with hopes of learning, building, and expanding Internet-based surveillance systems. Event-based Internet biosurveillance should act as an extension of traditional systems, to be utilised as an additional, supplemental data source to have a more comprehensive estimate of disease burden.}, author = {O'Shea, Jesse}, doi = {10.1016/j.ijmedinf.2017.01.019}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/O'Shea - 2017 - Digital disease detection A systematic review of event-based internet biosurveillance systems(2).pdf:pdf}, issn = {18728243}, journal = {International Journal of Medical Informatics}, keywords = {Biosurveillance,Disease surveillance,Public health}, month = {may}, pages = {15--22}, publisher = {Elsevier Ireland Ltd}, title = {{Digital disease detection: A systematic review of event-based internet biosurveillance systems}}, volume = {101}, year = {2017} } @article{Brownstein2008, abstract = {John Brownstein and colleagues discuss HealthMap, an automated real-time system that monitors and disseminates online information about emerging infectious diseases.}, author = {Brownstein, John S. and Freifeld, Clark C. and Reis, Ben Y. and Mandl, Kenneth D.}, doi = {10.1371/journal.pmed.0050151}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Brownstein et al. - 2008 - Surveillance Sans Fronti{\`{e}}res Internet-Based Emerging Infectious Disease Intelligence and the HealthMap Projec.pdf:pdf;:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Brownstein et al. - 2008 - Surveillance sans frontiers Internet-based emerging infectious disease intelligence and the HealthMap project.pdf:pdf}, isbn = {1549-1676}, issn = {15491277}, journal = {PLoS Medicine}, keywords = {webcrawl}, mendeley-tags = {webcrawl}, month = {jul}, number = {7}, pages = {e151}, pmid = {18613747}, publisher = {Public Library of Science}, title = {{Surveillance Sans Fronti{\`{e}}res: Internet-Based Emerging Infectious Disease Intelligence and the HealthMap Project}}, url = {http://dx.plos.org/10.1371/journal.pmed.0050151}, volume = {5}, year = {2008} } @article{Khoury2014, abstract = {In 1854, as cholera swept through London, John Snow, the father of modern epidemiology, painstakingly recorded the locations of affected homes. After long, laborious work, he implicated the Broad Street water pump as the source of the outbreak, even without knowing that a Vibrio organism caused cholera. “Today, Snow might have crunched Global Positioning System information and disease prevalence data, solving the problem within hours” (1). That is the potential impact of “Big Data” on the public's health. But the promise of Big Data is also accompanied by claims that “the scientific method itself is becoming obsolete” (2), as next-generation computers, such as IBM's Watson (3), sift through the digital world to provide predictive models based on massive information. Separating the true signal from the gigantic amount of noise is neither easy nor straightforward, but it is a challenge that must be tackled if information is ever to be translated into societal well-being.}, archivePrefix = {arXiv}, arxivId = {15334406}, author = {Khoury, Muin J M.J. Muin J and Ioannidis, J.P.A. John P A}, doi = {10.1126/science.aaa2709}, eprint = {15334406}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Khoury, Ioannidis - 2014 - Medicine. Big data meets public health(2).pdf:pdf;:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Khoury, Ioannidis - 2014 - Medicine. Big data meets public health.pdf:pdf}, isbn = {1095-9203 (Electronic)$\backslash$r0036-8075 (Linking)}, issn = {1095-9203}, journal = {Science (New York, N.Y.)}, keywords = {Databases,Factual,Humans,Public Health,big data}, mendeley-tags = {big data}, month = {nov}, number = {6213}, pages = {1054--5}, pmid = {25430753}, publisher = {NIH Public Access}, title = {{Medicine. Big data meets public health.}}, url = {http://www.sciencemag.org/content/346/6213/1054{\%}5Cnhttp://science.sciencemag.org/content/346/6213/1054.abstract http://www.ncbi.nlm.nih.gov/pubmed/25430753 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4684636}, volume = {346}, year = {2014} } @article{Schneeweiss2014, abstract = {In any field, improving performance and accountability depends on having a shared goal that unites the interests and activities of all stakeholders. In health care, however, stakeholders have myriad, often conflicting goals, including access to services, profitability, high quality, cost containment, safety, convenience, patient-centeredness, and satisfaction. Lack of clarity about goals has led to divergent approaches, gaming of the system, and slow progress in performance improvement. Achieving high value for patients must become the overarching goal of health care delivery, with value defined as the health outcomes achieved per dollar spent.(1) This goal is what matters for patients and unites . . .}, author = {Schneeweiss, Sebastian}, doi = {10.1056/NEJMp1002530}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Schneeweiss - 2014 - Learning from Big Health Care Data.pdf:pdf;:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Schneeweiss - 2014 - Learning from Big Health Care Data(2).pdf:pdf}, isbn = {0028-4793}, issn = {15334406}, journal = {The New England Journal of Medicine}, keywords = {big data}, mendeley-tags = {big data}, month = {jun}, number = {23}, pages = {2161--2163}, pmid = {20573919}, title = {{Learning from Big Health Care Data}}, url = {http://scholar.google.com/scholar?hl=en{\&}btnG=Search{\&}q=intitle:New+engla+nd+journal{\#}0 http://www.ncbi.nlm.nih.gov/pubmed/24897079 http://www.nejm.org/doi/abs/10.1056/NEJMp1401111}, volume = {370}, year = {2014} } @article{Pfeiffer2015, abstract = {Concurrent with global economic development in the last 50 years, the opportunities for the spread of existing diseases and emergence of new infectious pathogens, have increased substantially. The activities associated with the enormously intensified global connectivity have resulted in large amounts of data being generated, which in turn provides opportunities for generating knowledge that will allow more effective management of animal and human health risks. This so-called Big Data has, more recently, been accompanied by the Internet of Things which highlights the increasing presence of a wide range of sensors, interconnected via the Internet. Analysis of this data needs to exploit its complexity, accommodate variation in data quality and should take advantage of its spatial and temporal dimensions, where available. Apart from the development of hardware technologies and networking/communication infrastructure, it is necessary to develop appropriate data management tools that make this data accessible for analysis. This includes relational databases, geographical information systems and most recently, cloud-based data storage such as Hadoop distributed file systems. While the development in analytical methodologies has not quite caught up with the data deluge, important advances have been made in a number of areas, including spatial and temporal data analysis where the spectrum of analytical methods ranges from visualisation and exploratory analysis, to modelling. While there used to be a primary focus on statistical science in terms of methodological development for data analysis, the newly emerged discipline of data science is a reflection of the challenges presented by the need to integrate diverse data sources and exploit them using novel data- and knowledge-driven modelling methods while simultaneously recognising the value of quantitative as well as qualitative analytical approaches. Machine learning regression methods, which are more robust and can handle large datasets faster than classical regression approaches, are now also used to analyse spatial and spatio-temporal data. Multi-criteria decision analysis methods have gained greater acceptance, due in part, to the need to increasingly combine data from diverse sources including published scientific information and expert opinion in an attempt to fill important knowledge gaps. The opportunities for more effective prevention, detection and control of animal health threats arising from these developments are immense, but not without risks given the different types, and much higher frequency, of biases associated with these data.}, author = {Pfeiffer, Dirk U. and Stevens, Kim B.}, doi = {10.1016/j.prevetmed.2015.05.012}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Pfeiffer, Stevens - 2015 - Spatial and temporal epidemiological analysis in the Big Data era.pdf:pdf}, isbn = {0167-5877}, issn = {01675877}, journal = {Preventive Veterinary Medicine}, keywords = {Data science,Exploratory analysis,Internet of Things,Internet of things,Modelling,Multi-criteria decision analysis,Spatial analysis,Visualisation,big data,spatial}, mendeley-tags = {big data,spatial}, month = {nov}, number = {1-2}, pages = {213--220}, pmid = {26092722}, publisher = {Elsevier B.V.}, title = {{Spatial and temporal epidemiological analysis in the Big Data era}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/26092722 http://linkinghub.elsevier.com/retrieve/pii/S0167587715002111}, volume = {122}, year = {2015} } @article{Althouse2015, abstract = {Novel data streams (NDS), such as web search data or social media updates, hold promise for enhancing the capabilities of public health surveillance. In this paper, we outline a conceptual framework for integrating NDS into current public health surveillance. Our approach focuses on two key questions: What are the opportunities for using NDS and what are the minimal tests of validity and utility that must be applied when using NDS? Identifying these opportunities will necessitate the involvement of public health authorities and an appreciation of the diversity of objectives and scales across agencies at different levels (local, state, national, international). We present the case that clearly articulating surveillance objectives and systematically evaluating NDS and comparing the performance of NDS to existing surveillance data and alternative NDS data is critical and has not sufficiently been addressed in many applications of NDS currently in the literature.}, author = {Althouse, Benjamin M. and Scarpino, Samuel V. and Meyers, Lauren Ancel and Ayers, John W. and Bargsten, Marisa and Baumbach, Joan and Brownstein, John S. and Castro, Lauren and Clapham, Hannah and Cummings, Derek A.T. and {Del Valle}, Sara and Eubank, Stephen and Fairchild, Geoffrey and Finelli, Lyn and Generous, Nicholas and George, Dylan and Harper, David R. and H{\'{e}}bert-Dufresne, Laurent and Johansson, Michael A. and Konty, Kevin and Lipsitch, Marc and Milinovich, Gabriel and Miller, Joseph D. and Nsoesie, Elaine O. and Olson, Donald R. and Paul, Michael and Polgreen, Philip M. and Priedhorsky, Reid and Read, Jonathan M. and Rodr{\'{i}}guez-Barraquer, Isabel and Smith, Derek J. and Stefansen, Christian and Swerdlow, David L. and Thompson, Deborah and Vespignani, Alessandro and Wesolowski, Amy}, doi = {10.1140/epjds/s13688-015-0054-0}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Althouse et al. - 2015 - Enhancing disease surveillance with novel data streams challenges and opportunities.pdf:pdf}, issn = {21931127}, journal = {EPJ Data Science}, keywords = {ddds,digital surveillance,disease surveillance,hybrid systems,novel data sources,novel data streams}, mendeley-tags = {ddds,hybrid systems,novel data sources}, number = {1}, pages = {1--8}, publisher = {Althouse et al.}, title = {{Enhancing disease surveillance with novel data streams: challenges and opportunities}}, url = {http://dx.doi.org/10.1140/epjds/s13688-015-0054-0}, volume = {4}, year = {2015} } @inproceedings{Kumar2015, abstract = {Rapid growth of the Internet has paved the way for millions of people across the globe to access social media platforms such as Facebook and Twitter. These social media platforms enable people to share information instantaneously. The large volume of information shared on these platforms can be leveraged to identify outbreaks of various epidemics. This will help health professionals to provide timely intervention, which in return could help save lives and millions of dollars. Analysis of information shared on social media is complicated due to its sheer volume, varied formats and velocity of collection. We have addressed this potential problem by making use of a big data analytics platform capable of handling large quantities of streaming data. In this paper we demonstrate how data from social media can be effectively used in the surveillance of disease conditions.}, author = {Kumar, A T K and Asamoah, D and Sharda, R}, booktitle = {2015 Americas Conference on Information Systems, AMCIS 2015}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Kumar, Asamoah, Sharda - 2015 - Can social media support public health Demonstrating disease surveillance using big data analytics.pdf:pdf}, isbn = {9780996683104}, keywords = {social media}, mendeley-tags = {social media}, title = {{Can social media support public health? Demonstrating disease surveillance using big data analytics}}, year = {2015} } @article{Hoinville2013, abstract = {The information provided by animal-health surveillance helps to reduce the impact of animal diseases. The widespread movement of animals and their products around the world results in an increasing risk that disease will spread. There is, therefore, a need for exchange between countries of comparable information about disease incidence; the exchange must be based on a common understanding of surveillance approaches and how surveillance systems are designed and implemented. Establishing agreed-upon definitions of surveillance terms would be a first step in achieving this standardisation, and will enhance transparency and confidence. To this end, a workshop was held with the aim of agreeing upon key terms and concepts for animal-health surveillance. In this paper, we describe the methods used at the workshop and summarise the discussions. A complete list of all the proposed definitions including lists of characteristics that can be used to describe surveillance activities and attributes for evaluation of surveillance is available in the workshop report (available at http://www.defra.gov.uk/ahvla-en/disease-control/surveillance/icahs-workshop/). Some important issues were highlighted during these discussions; of particular note was the importance of economic efficiency as an evaluation attribute. Some remaining inconsistencies in the proposed use of terms are highlighted (including the definition of 'risk-based surveillance' and the use of the term 'event-based surveillance').}, author = {Hoinville, L.J. J and Alban, L. and Drewe, J.A. A and Gibbens, J.C. C and Gustafson, L. and H{\"{a}}sler, B. and Saegerman, C. and Salman, M. and St{\"{a}}rk, K.D.C. D C}, doi = {10.1016/j.prevetmed.2013.06.006}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Hoinville et al. - 2013 - Proposed terms and concepts for describing and evaluating animal-health surveillance systems.pdf:pdf}, issn = {01675877}, journal = {Preventive Veterinary Medicine}, keywords = {Animal-health,Definitions,Description,Evaluation,Standardisation,Surveillance,Terms}, month = {oct}, number = {1-2}, pages = {1--12}, pmid = {23906392}, title = {{Proposed terms and concepts for describing and evaluating animal-health surveillance systems}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/23906392 http://linkinghub.elsevier.com/retrieve/pii/S0167587713002055}, volume = {112}, year = {2013} } @article{Stevens2015, abstract = {{\textcopyright} 2015 Elsevier Ltd.During the last 30. years it has become commonplace for epidemiological studies to collect locational attributes of disease data. Although this advancement was driven largely by the introduction of handheld global positioning systems (GPS), and more recently, smartphones and tablets with built-in GPS, the collection of georeferenced disease data has moved beyond the use of handheld GPS devices and there now exist numerous sources of crowdsourced georeferenced disease data such as that available from georeferencing of Google search queries or Twitter messages. In addition, cartography has moved beyond the realm of professionals to crowdsourced mapping projects that play a crucial role in disease control and surveillance of outbreaks such as the 2014 West Africa Ebola epidemic. This paper provides a comprehensive review of a range of innovative sources of spatial animal and human health data including data warehouses, mHealth, Google Earth, volunteered geographic information and mining of internet-based big data sources such as Google and Twitter. We discuss the advantages, limitations and applications of each, and highlight studies where they have been used effectively.}, author = {Stevens, Kim B. and Pfeiffer, Dirk U.}, doi = {10.1016/j.sste.2015.04.003}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Stevens, Pfeiffer - 2015 - Sources of spatial animal and human health data Casting the net wide to deal more effectively with increasing.pdf:pdf}, isbn = {1877-5853 (Electronic)$\backslash$r1877-5845 (Linking)}, issn = {18775845}, journal = {Spatial and Spatio-temporal Epidemiology}, keywords = {Big data,Data warehouse,Google Earth,MHealth,Spatial data,Volunteered geographic information,spatial}, mendeley-tags = {spatial}, month = {apr}, pages = {15--29}, pmid = {26046634}, publisher = {Elsevier Ltd}, title = {{Sources of spatial animal and human health data: Casting the net wide to deal more effectively with increasingly complex disease problems}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S1877584515000179 http://dx.doi.org/10.1016/j.sste.2015.04.003}, volume = {13}, year = {2015} } @article{Salathe2012, abstract = {Mobile, social, real-time: the ongoing revolution in the way people communicate has given rise to a new kind of epidemiology. Digital data sources, when harnessed appropriately, can provide local and timely information about disease and health dynamics in populations around the world. The rapid, unprecedented increase in the availability of relevant data from various digital sources creates considerable technical and computational challenges.}, author = {Salath{\'{e}}, Marcel and Bengtsson, Linus and Bodnar, Todd J. and Brewer, Devon D. and Brownstein, John S. and Buckee, Caroline and Campbell, Ellsworth M. and Cattuto, Ciro and Khandelwal, Shashank and Mabry, Patricia L. and Vespignani, Alessandro}, doi = {10.1371/journal.pcbi.1002616}, editor = {Bourne, Philip E.}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Salath{\'{e}} et al. - 2012 - Digital epidemiology.pdf:pdf}, isbn = {1553-7358 (Electronic)$\backslash$r1553-734X (Linking)}, issn = {1553734X}, journal = {PLoS Computational Biology}, keywords = {big data,webcrawl}, mendeley-tags = {big data,webcrawl}, month = {jul}, number = {7}, pages = {1--5}, pmid = {22844241}, publisher = {Public Library of Science}, title = {{Digital epidemiology}}, url = {http://dx.plos.org/10.1371/journal.pcbi.1002616}, volume = {8}, year = {2012} } @article{Lazer2014, abstract = {In February 2013, Google Flu Trends (GFT) made headlines but not for a reason that Google executives or the creators of the flu tracking system would have hoped. Nature reported that GFT was predicting more than double the proportion of doctor visits for influenza-like illness (ILI) than the Centers for Disease Control and Prevention (CDC), which bases its estimates on surveillance reports from laboratories across the United States (1, 2). This happened despite the fact that GFT was built to predict CDC reports. Given that GFT is often held up as an exemplary use of big data (3, 4), what lessons can we draw from this error?}, author = {Lazer, David and Kennedy, Ryan and King, Gary and Vespignani, Alessandro}, doi = {10.1126/science.1248506}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Lazer et al. - 2014 - The Parable of Google Flu Traps in Big Data Analysis.pdf:pdf}, isbn = {0036-8075}, issn = {1095-9203}, journal = {Science}, keywords = {google flu,syndromic surveillance,webcrawl}, mendeley-tags = {google flu,syndromic surveillance,webcrawl}, number = {6167}, pages = {1203--1205}, pmid = {24626916}, title = {{The Parable of Google Flu: Traps in Big Data Analysis}}, url = {http://www.sciencemag.org/content/343/6176/1203 http://science.sciencemag.org/content/343/6176/1203}, volume = {343}, year = {2014} } @inproceedings{Zhang2016, abstract = {Asthma is a chronic disease that affects people of all ages, and is a serious health and economic concern worldwide. However, accurate and timely surveillance and predicting hospital visits could allow for targeted interventions and reduce the societal burden of asthma. Current national asthma disease surveillance systems can have data availability lags of up to months and years. Rapid progress has been made in gathering social media data to perform disease surveillance and prediction. We introduce novel methods for extracting signals from social media data to assist in accurate and timely asthma surveillance. Our empirical analyses show that our methods are very effective for surveillance of asthma prevalence at both state and municipal levels. They are also useful for predicting the number of hospital visits based on near-real-Time social media data for specific geographic areas. Our results can be used for public health surveillance, ED preparedness, and targeted patient interventions.}, author = {Zhang, W and Ram, S and Burkart, M and Pengetnze, Y}, booktitle = {DH 2016 - Proceedings of the 2016 Digital Health Conference}, doi = {10.1145/2896338.2896340}, isbn = {9781450342247}, title = {{Extracting signals from social media for chronic disease surveillance}}, url = {http://dl.acm.org/citation.cfm?id=2897728}, year = {2016} } @article{Huang2015, abstract = {{\textcopyright} 2015 Elsevier Inc.With the development of smart devices and cloud computing, more and more public health data can be collected from various sources and can be analyzed in an unprecedented way. The huge social and academic impact of such developments caused a worldwide buzz for big data. In this review article, we summarized the latest applications of Big Data in health sciences, including the recommendation systems in healthcare, Internet-based epidemic surveillance, sensor-based health conditions and food safety monitoring, Genome-Wide Association Studies (GWAS) and expression Quantitative Trait Loci (eQTL), inferring air quality using big data and metabolomics and ionomics for nutritionists. We also reviewed the latest technologies of big data collection, storage, transferring, and the state-of-the-art analytical methods, such as Hadoop distributed file system, MapReduce, recommendation system, deep learning and network Analysis. At last, we discussed the future perspectives of health sciences in the era of Big Data.}, author = {Huang, Tao and Lan, Liang and Fang, Xuexian and An, Peng and Min, Junxia and Wang, Fudi}, doi = {10.1016/j.bdr.2015.02.002}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Huang et al. - 2015 - Promises and Challenges of Big Data Computing in Health Sciences.pdf:pdf}, issn = {22145796}, journal = {Big Data Research}, keywords = {big data}, mendeley-tags = {big data}, month = {mar}, number = {1}, pages = {2--11}, title = {{Promises and Challenges of Big Data Computing in Health Sciences}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S2214579615000118}, volume = {2}, year = {2015} } @article{big data, abstract = {The vast amount of health data generated and stored around the world each day offers significant opportunities for advances such as the real-time tracking of diseases, predicting disease outbreaks, and developing health care that is truly personalized. However, capturing, analyzing, and sharing health data is difficult, expensive, and controversial. This article explores four central questions that policy makers should consider when developing public policy for the use of "big data" in health care. We discuss what aspects of big data are most relevant for health care and present a taxonomy of data types and levels of access. We suggest that successful policies require clear objectives and provide examples, discuss barriers to achieving policy objectives based on a recent policy experiment in the United Kingdom, and propose levers that policy makers should consider using to advance data sharing. We argue that the case for data sharing can be won only by providing real-life examples of the ways in which it can improve health care.}, author = {Heitmueller, Axel and Henderson, Sarah and Warburton, Will and Elmagarmid, Ahmed and Pentland, Alex S. and Darzi, Ara}, doi = {10.1377/hlthaff.2014.0771}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Heitmueller et al. - 2014 - Developing Public Policy To Advance The Use Of Big Data In Health Care.pdf:pdf}, isbn = {0278-2715}, issn = {0278-2715}, journal = {Health Affairs}, keywords = {Big Data,Global Health,Health Policy,big data,policy}, mendeley-tags = {big data,policy}, month = {sep}, number = {9}, pages = {1523--1530}, pmid = {25201656}, title = {{Developing Public Policy To Advance The Use Of Big Data In Health Care}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/25201656 http://content.healthaffairs.org/cgi/doi/10.1377/hlthaff.2014.0771}, volume = {33}, year = {2014} } @article{Kao2014, abstract = {In epidemiology, the identification of 'who infected whom' allows us to quantify key characteristics such as incubation periods, heterogeneity in transmission rates, duration of infectiousness, and the existence of high-risk groups. Although invaluable, the existence of many plausible infection pathways makes this difficult, and epidemiological contact tracing either uncertain, logistically prohibitive, or both. The recent advent of next-generation sequencing technology allows the identification of traceable differences in the pathogen genome that are transforming our ability to understand high-resolution disease transmission, sometimes even down to the host-to-host scale. We review recent examples of the use of pathogen whole-genome sequencing for the purpose of forensic tracing of transmission pathways, focusing on the particular problems where evolutionary dynamics must be supplemented by epidemiological information on the most likely timing of events as well as possible transmission pathways. We also discuss potential pitfalls in the over-interpretation of these data, and highlight the manner in which a confluence of this technology with sophisticated mathematical and statistical approaches has the potential to produce a paradigm shift in our understanding of infectious disease transmission and control. {\textcopyright} 2014 Elsevier Ltd.}, author = {Kao, R.R. Rowland R. and Haydon, D.T. Daniel T. and Lycett, Samantha J. S.J. and Murcia, Pablo R. P.R.}, doi = {10.1016/j.tim.2014.02.011}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Kao et al. - 2014 - Supersize me How whole-genome sequencing and big data are transforming epidemiology.pdf:pdf}, isbn = {0966-842X}, issn = {18784380}, journal = {Trends in Microbiology}, keywords = {Bayesian inference,Forensic epidemiology,Mathematical modeling,Pathogen evolution,Who-infected-whom?,big data,ddds,wgs}, mendeley-tags = {big data,ddds,wgs}, number = {5}, pages = {282--291}, pmid = {24661923}, publisher = {Elsevier Ltd}, title = {{Supersize me: How whole-genome sequencing and big data are transforming epidemiology}}, url = {http://dx.doi.org/10.1016/j.tim.2014.02.011}, volume = {22}, year = {2014} } @article{Manogaran2017, abstract = {{\textcopyright} 2017, IGI Global.Ambient intelligence is an emerging platform that provides advances in sensors and sensor networks, pervasive computing, and artificial intelligence to capture the real time climate data. This result continuously generates several exabytes of unstructured sensor data and so it is often called big climate data. Nowadays, researchers are trying to use big climate data to monitor and predict the climate change and possible diseases. Traditional data processing techniques and tools are not capable of handling such huge amount of climate data. Hence, there is a need to develop advanced big data architecture for processing the real time climate data. The purpose of this paper is to propose a big data based surveillance system that analyzes spatial climate big data and performs continuous monitoring of correlation between climate change and Dengue. Proposed disease surveillance system has been implemented with the help of Apache Hadoop MapReduce and its supporting tools.}, author = {Manogaran, Gunasekaran and Lopez, Daphne}, doi = {10.4018/IJACI.2017040106}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Manogaran, Lopez - 2017 - Disease Surveillance System for Big Climate Data Processing and Dengue Transmission.pdf:pdf}, issn = {1941-6237}, journal = {International Journal of Ambient Computing and Intelligence}, keywords = {big data}, mendeley-tags = {big data}, month = {apr}, number = {2}, pages = {88--105}, title = {{Disease Surveillance System for Big Climate Data Processing and Dengue Transmission}}, url = {http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/IJACI.2017040106}, volume = {8}, year = {2017} } @article{Gittelman2015, abstract = {{\textcopyright}Steven Gittelman, Victor Lange, Carol A Gotway Crawford, Catherine A Okoro, Eugene Lieb, Satvinder S Dhingra, Elaine Trimarchi.Background: Investigation into personal health has become focused on conditions at an increasingly local level, while response rates have declined and complicated the process of collecting data at an individual level. Simultaneously, social media data have exploded in availability and have been shown to correlate with the prevalence of certain health conditions. Objective: Facebook likes may be a source of digital data that can complement traditional public health surveillance systems and provide data at a local level. We explored the use of Facebook likes as potential predictors of health outcomes and their behavioral determinants. Methods: We performed principal components and regression analyses to examine the predictive qualities of Facebook likes with regard to mortality, diseases, and lifestyle behaviors in 214 counties across the United States and 61 of 67 counties in Florida. These results were compared with those obtainable from a demographic model. Health data were obtained from both the 2010 and 2011 Behavioral Risk Factor Surveillance System (BRFSS) and mortality data were obtained from the National Vital Statistics System. Results: Facebook likes added significant value in predicting most examined health outcomes and behaviors even when controlling for age, race, and socioeconomic status, with model fit improvements (adjusted R2) of an average of 58{\%} across models for 13 different health-related metrics over basic sociodemographic models. Small area data were not available in sufficient abundance to test the accuracy of the model in estimating health conditions in less populated markets, but initial analysis using data from Florida showed a strong model fit for obesity data (adjusted R2=.77). Conclusions: Facebook likes provide estimates for examined health outcomes and health behaviors that are comparable to those obtained from the BRFSS. Online sources may provide more reliable, timely, and cost-effective county-level data than that obtainable from traditional public health surveillance systems as well as serve as an adjunct to those systems.}, author = {Gittelman, Steven and Lange, Victor and {Gotway Crawford}, Carol A and Okoro, Catherine A and Lieb, Eugene and Dhingra, Satvinder S and Trimarchi, Elaine}, doi = {10.2196/jmir.3970}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Gittelman et al. - 2015 - A New Source of Data for Public Health Surveillance Facebook Likes.pdf:pdf}, issn = {1438-8871}, journal = {Journal of Medical Internet Research}, keywords = {social media}, mendeley-tags = {social media}, month = {apr}, number = {4}, pages = {e98}, title = {{A New Source of Data for Public Health Surveillance: Facebook Likes}}, url = {http://www.jmir.org/2015/4/e98/}, volume = {17}, year = {2015} } @article{Hay2013, author = {Hay, Simon I. and George, Dylan B. and Moyes, Catherine L. and Brownstein, John S. and Flaxman, AD}, doi = {10.1371/journal.pmed.1001413}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Hay et al. - 2013 - Big Data Opportunities for Global Infectious Disease Surveillance.pdf:pdf}, issn = {1549-1676}, journal = {PLoS Medicine}, keywords = {big data,biosurveillance,ddds}, mendeley-tags = {big data,biosurveillance,ddds}, month = {apr}, number = {4}, pages = {e1001413}, publisher = {Public Library of Science}, title = {{Big Data Opportunities for Global Infectious Disease Surveillance}}, url = {http://dx.plos.org/10.1371/journal.pmed.1001413}, volume = {10}, year = {2013} } @article{Hoffman2013, abstract = {The accelerating adoption of electronic health record (EHR) systems will have far-reaching implications for public health research and surveillance, which in turn could lead to changes in public policy, statutes, and regulations. The public health benefits of EHR use can be significant. However, researchers and analysts who rely on EHR data must proceed with caution and understand the potential limitations of EHRs. Because of clinicians' workloads, poor user-interface design, and other factors, EHR data can be erroneous, miscoded, fragmented, and incomplete. In addition, public health findings can be tainted by the problems of selection bias, confounding bias, and measurement bias. These flaws may become all the more troubling and important in an era of electronic "big data," in which a massive amount of information is processed automatically, without human checks. Thus, we conclude the paper by outlining several regulatory and other interventions to address data analysis difficulties that could result in invalid conclusions and unsound public health policies. {\textcopyright} 2013 American Society of Law, Medicine {\&} Ethics, Inc.}, author = {Hoffman, Sharona and Podgurski, Andy}, doi = {10.1111/jlme.12040}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Hoffman, Podgurski - 2013 - Big Bad Data Law, Public Health, and Biomedical Databases.pdf:pdf}, issn = {10731105}, journal = {The Journal of Law, Medicine {\&} Ethics}, keywords = {big data,policy}, mendeley-tags = {big data,policy}, month = {mar}, number = {SUPPL. 1}, pages = {56--60}, title = {{Big Bad Data: Law, Public Health, and Biomedical Databases}}, url = {http://doi.wiley.com/10.1111/jlme.12040}, volume = {41}, year = {2013} } @article{Simonsen2016, abstract = {{\textcopyright} The Author 2016.While big data have proven immensely useful in fields such as marketing and earth sciences, public health is still relying on more traditional surveillance systems and awaiting the fruits of a big data revolution. A new generation of big data surveillance systems is needed to achieve rapid, flexible, and local tracking of infectious diseases, especially for emerging pathogens. In this opinion piece, we reflect on the long and distinguished history of disease surveillance and discuss recent developments related to use of big data. We start with a brief review of traditional systems relying on clinical and laboratory reports.We then examine how large-volume medical claims data can, with great spatiotemporal resolution, help elucidate local disease patterns. Finally, we review efforts to develop surveillance systems based on digital and social data streams, including the recent rise and fall of Google Flu Trends. We conclude by advocating for increased use of hybrid systems combining information from traditional surveillance and big data sources, which seems the most promising option moving forward. Throughout the article, we use influenza as an exemplar of an emerging and reemerging infection which has traditionally been considered a model system for surveillance and modeling.}, author = {Simonsen, Lone and Gog, Julia R and Olson, Don and Viboud, C{\'{e}}cile}, doi = {10.1093/infdis/jiw376}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Simonsen et al. - 2016 - Infectious Disease Surveillance in the Big Data Era Towards Faster and Locally Relevant Systems.pdf:pdf}, issn = {0022-1899}, journal = {Journal of Infectious Diseases}, keywords = {big data,ddds}, mendeley-tags = {big data,ddds}, month = {dec}, number = {suppl 4}, pages = {S380--S385}, title = {{Infectious Disease Surveillance in the Big Data Era: Towards Faster and Locally Relevant Systems}}, url = {https://academic.oup.com/jid/article-lookup/doi/10.1093/infdis/jiw376}, volume = {214}, year = {2016} } @article{Vallmuur2016, abstract = {{\textcopyright} 2016, BMJ Publishing Group. All rights reserved.Objective Vast amounts of injury narratives are collected daily and are available electronically in real time and have great potential for use in injury surveillance and evaluation. Machine learning algorithms have been developed to assist in identifying cases and classifying mechanisms leading to injury in a much timelier manner than is possible when relying on manual coding of narratives. The aim of this paper is to describe the background, growth, value, challenges and future directions of machine learning as applied to injury surveillance. Methods This paper reviews key aspects of machine learning using injury narratives, providing a case study to demonstrate an application to an established humanmachine learning approach. Results The range of applications and utility of narrative text has increased greatly with advancements in computing techniques over time. Practical and feasible methods exist for semiautomatic classification of injury narratives which are accurate, efficient and meaningful. The human-machine learning approach described in the case study achieved high sensitivity and PPV and reduced the need for human coding to less than a third of cases in one large occupational injury database. Conclusions The last 20 years have seen a dramatic change in the potential for technological advancements in injury surveillance. Machine learning of ‘big injury narrative data' opens up many possibilities for expanded sources of data which can provide more comprehensive, ongoing and timely surveillance to inform future injury prevention policy and practice.}, author = {Vallmuur, Kirsten and Marucci-Wellman, Helen R and Taylor, Jennifer A and Lehto, Mark and Corns, Helen L and Smith, Gordon S}, doi = {10.1136/injuryprev-2015-041813}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Vallmuur et al. - 2016 - Harnessing information from injury narratives in the ‘big data' era understanding and applying machine learning.pdf:pdf}, issn = {1353-8047}, journal = {Injury Prevention}, keywords = {classification,ddds,nlp}, mendeley-tags = {classification,ddds,nlp}, month = {apr}, number = {Suppl 1}, pages = {i34--i42}, title = {{Harnessing information from injury narratives in the ‘big data' era: understanding and applying machine learning for injury surveillance}}, url = {http://injuryprevention.bmj.com/lookup/doi/10.1136/injuryprev-2015-041813}, volume = {22}, year = {2016} } @article{Gates2015, abstract = {Reducing the burden of emerging and endemic infectious diseases on commercial livestock production systems will require the development of innovative technology platforms that enable information from diverse animal health resources to be collected, analyzed, and communicated in near real-time. In this paper, we review recent initiatives to leverage data routinely observed by farmers, production managers, veterinary practitioners, diagnostic laboratories, regulatory officials, and slaughterhouse inspectors for disease surveillance purposes. The most commonly identified challenges were (1) the lack of standardized systems for recording essential data elements within and between surveillance data streams, (2) the additional time required to collect data elements that are not routinely recorded by participants, (3) the concern over the sharing and use of business sensitive information with regulatory authorities and other data analysts, (4) the difficulty in developing sustainable incentives to maintain long-term program participation, and (5) the limitations in current methods for analyzing and reporting animal health information in a manner that facilitates actionable response. With the significant recent advances in information science, there are many opportunities to develop more sophisticated systems that meet national disease surveillance objectives, while still providing participants with valuable tools and feedback to manage routine animal health concerns.}, annote = {From Duplicate 2 (Integrating novel data streams to support biosurveillance in commercial livestock production systems in developed countries: challenges and opportunities - Gates, M Carolyn; Holmstrom, Lindsey K; Biggers, Keith E; Beckham, Tammy R) And Duplicate 4 (Integrating novel data streams to support biosurveillance in commercial livestock production systems in developed countries: challenges and opportunities - Gates, M Carolyn; Holmstrom, Lindsey K; Biggers, Keith E; Beckham, Tammy R) RAYYAN-INCLUSION: {\{}"Fernanda"={\textgreater}true, "flavie.vial"={\textgreater}true{\}} | RAYYAN-LABELS: data collection From Duplicate 3 (Integrating Novel Data Streams to Support Biosurveillance in Commercial Livestock Production Systems in Developed Countries: Challenges and Opportunities - Gates, M. Carolyn; Holmstrom, Lindsey K.; Biggers, Keith E.; Beckham, Tammy R.) From Duplicate 2 (Integrating novel data streams to support biosurveillance in commercial livestock production systems in developed countries: challenges and opportunities - Gates, M Carolyn; Holmstrom, Lindsey K; Biggers, Keith E; Beckham, Tammy R) RAYYAN-INCLUSION: {\{}"Fernanda"={\textgreater}true, "flavie.vial"={\textgreater}true{\}} | RAYYAN-LABELS: data collection}, author = {Gates, M. Carolyn and Holmstrom, Lindsey K. and Biggers, Keith E. and Beckham, Tammy R.}, doi = {10.3389/fpubh.2015.00074}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Gates et al. - 2015 - Integrating novel data streams to support biosurveillance in commercial livestock production systems in developed.pdf:pdf}, issn = {2296-2565}, journal = {Frontiers in Public Health}, keywords = {biosurveillance,data sources,ddds,epidemiology,infectious disease,information technology,livestock production,syndromic surveillance,veterinar,veterinary medicine}, mendeley-tags = {data sources,ddds}, number = {April}, pages = {74}, title = {{Integrating novel data streams to support biosurveillance in commercial livestock production systems in developed countries: challenges and opportunities}}, url = {http://journal.frontiersin.org/article/10.3389/fpubh.2015.00074/abstract}, volume = {3}, year = {2015} } @article{Mitchell2016, abstract = {{\textcopyright} 2016 The Authors.Numerous studies have attempted to model the effect of mass media on the transmission of diseases such as influenza; however, quantitative data on media engagement has until recently been difficult to obtain. With the recent explosion of ‘big data' coming from online social media and the like, large volumes of data on a population's engagement with mass media during an epidemic are becoming available to researchers. In this study, we combine an online dataset comprising millions of shared messages relating to influenza with traditional surveillance data on flu activity to suggest a functional form for the relationship between the two. Using this data, we present a simple deterministic model for influenza dynamics incorporating media effects, and show that such a model helps explain the dynamics of historical influenza outbreaks. Furthermore, through model selection we show that the proposed media function fits historical data better than other media functions proposed in earlier studies.}, author = {Mitchell, L. and Ross, J.V. V}, doi = {10.1098/rsos.160481}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Mitchell, Ross - 2016 - A data-driven model for influenza transmission incorporating media effects.pdf:pdf}, journal = {Royal Society Open Science}, keywords = {data driven,modeling}, mendeley-tags = {data driven,modeling}, number = {10}, title = {{A data-driven model for influenza transmission incorporating media effects}}, volume = {3}, year = {2016} } @article{VanderWaal2016, abstract = {The increasing availability and complexity of data has led to new opportunities and challenges in veterinary epidemiology around how to translate abundant, diverse, and rapidly growing ‘big' data into meaningful insights for animal health. Big data analytics are used to understand health risks and minimize the impact of adverse animal health issues through identifying high-risk populations, combining data or processes acting at multiple scales through epidemiological modeling approaches, and harnessing high velocity data to monitor animal health trends and detect emerging health threats. The advent of big data requires the incorporation of new skills into veterinary epidemiology training, including, for example, machine learning and coding, in order to prepare a new generation of scientists and practitioners to engage with big data. Establishing pipelines to analyze big data in near real-time is the next step for progressing from simply having ‘big data' to creating ‘smart data', with the objective of improving understanding of health risks, effectiveness of management and policy decisions, and ultimately preventing or at least minimizing the impact of adverse animal health issues.}, author = {VanderWaal, Kimberly and Morrison, Robert B and Neuhauser, Claudia and Vilalta, Carles and Perez, Andres M}, doi = {10.3389/FVETS.2017.00110}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/VanderWaal et al. - 2016 - Translating Big Data into Smart Data for Veterinary Epidemiology.pdf:pdf}, issn = {2297-1769}, journal = {Frontiers in Veterinary Science}, keywords = {Animal Movement,Modeling and Simulation,big data,machine learning,surveillance}, mendeley-tags = {big data}, pages = {110}, publisher = {Frontiers}, title = {{Translating Big Data into Smart Data for Veterinary Epidemiology}}, url = {http://journal.frontiersin.org/article/10.3389/fvets.2017.00110/abstract}, volume = {4}, year = {2016} } @article{Guernier2016, abstract = {{\textcopyright} 2016 Guernier et al.Background: Tick paralysis, resultant from envenomation by the scrub-tick Ixodes holocyclus, is a serious threat for small companion animals in the eastern coast of Australia. We hypothesise that surveillance systems that are built on Internet search queries may provide a more timely indication of high-risk periods more effectively than current approaches. Methods: Monthly tick paralysis notifications in dogs and cats across Australia and the states of Queensland (QLD) and New South Wales (NSW) were retrieved from Disease WatchDog surveillance system for the period 2011-2013. Internet search terms related to tick paralysis in small companion animals were identified using Google Correlate, and corresponding search frequency metrics were downloaded from Google Trends. Spearman's rank correlations and time series cross correlations were performed to assess which Google search terms lead or are synchronous with tick paralysis notifications. Results: Metrics data were available for 24 relevant search terms at national level, 16 for QLD and 18 for NSW, and they were all significantly correlated with tick paralysis notifications (P {\textless} 0.05). Among those terms, 70.8, 56.3 and 50 {\%} showed strong Spearman's correlations, at national level, for QLD, and for NSW respectively, and cross correlation analyses identified searches which lead notifications at national or state levels. Conclusion: This study demonstrates that Internet search metrics can be used to monitor the occurrence of tick paralysis in companion animals, which would facilitate early detection of high-risk periods for tick paralysis cases. This study constitutes the first application of the rapidly emerging field of Internet-based surveillance to veterinary science.}, author = {Guernier, Vanina and Milinovich, G.J. J Gabriel J and Santos, M.A.B. A B and Haworth, Mark and Coleman, Glen and {Soares Magalhaes}, R.J. J Ricardo J and {Bezerra Santos}, Marcos Antonio and Haworth, Mark and Coleman, Glen and {Soares Magalhaes}, R.J. J Ricardo J}, doi = {10.1186/s13071-016-1590-6}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Guernier et al. - 2016 - Use of big data in the surveillance of veterinary diseases Early detection of tick paralysis in companion anima.pdf:pdf}, issn = {1756-3305}, journal = {Parasites and Vectors}, month = {dec}, number = {1}, pages = {303}, title = {{Use of big data in the surveillance of veterinary diseases: Early detection of tick paralysis in companion animals}}, url = {http://parasitesandvectors.biomedcentral.com/articles/10.1186/s13071-016-1590-6}, volume = {9}, year = {2016} } @article{Santillana2017, author = {Santillana, Mauricio}, doi = {10.1093/cid/ciw660}, issn = {1058-4838}, journal = {Clinical Infectious Diseases}, month = {jan}, number = {1}, pages = {42--43}, title = {{Editorial Commentary : Perspectives on the Future of Internet Search Engines and Biosurveillance Systems}}, url = {https://academic.oup.com/cid/article-lookup/doi/10.1093/cid/ciw660}, volume = {64}, year = {2017} } @article{Gange2016, abstract = {{\textcopyright} The Author 2015. Published by Oxford University Press on behalf of the Johns Hopkins Bloomberg School of Public Health.For more than a century, epidemiology has seen major shifts in both focus and methodology. Taking into consideration the explosion of "big data," the advent of more sophisticated data collection and analytical tools, and the increased interest in evidence-based solutions, we present a framework that summarizes 3 fundamental domains of epidemiologic methods that are relevant for the understanding of both historical contributions and future directions in public health. First, the manner in which populations and their follow-up are defined is expanding, with greater interest in online populations whose definition does not fit the usual classification by person, place, and time. Second, traditional data collection methods, such as population-based surveillance and individual interviews, have been supplemented with advances in measurement. From biomarkers to mobile health, innovations in the measurement of exposures and diseases enable refined accuracy of data collection. Lastly, the comparison of populations is at the heart of epidemiologic methodology. Risk factor epidemiology, prediction methods, and causal inference strategies are areas in which the field is continuing to make significant contributions to public health. The framework presented herein articulates the multifaceted ways in which epidemiologic methods make such contributions and can continue to do so as we embark upon the next 100 years.}, author = {Gange, Stephen J. S.J. J and Golub, E.T. Elizabeth T.}, doi = {10.1093/aje/kwv150}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Gange, Golub - 2016 - From smallpox to big data The next 100 years of epidemiologic methods.pdf:pdf}, isbn = {0002-9262}, issn = {14766256}, journal = {American Journal of Epidemiology}, keywords = {inference,measurement,populations,synthesis}, number = {5}, pages = {423--426}, pmid = {26443419}, title = {{From smallpox to big data: The next 100 years of epidemiologic methods}}, volume = {183}, year = {2016} } @article{Milinovich2015, author = {Milinovich, Gabriel J and Magalh{\~{a}}es, Ricardo J Soares and Hu, Wenbiao}, doi = {10.1016/S2214-109X(14)70356-0}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Milinovich, Magalh{\~{a}}es, Hu - 2015 - Role of big data in the early detection of Ebola and other emerging infectious diseases.pdf:pdf}, issn = {2214109X}, journal = {The Lancet Global Health}, keywords = {webcrawl}, mendeley-tags = {webcrawl}, month = {jan}, number = {1}, pages = {e20--e21}, title = {{Role of big data in the early detection of Ebola and other emerging infectious diseases}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S2214109X14703560}, volume = {3}, year = {2015} } @article{Khoury2015, abstract = {{\textcopyright} 2015 The Author. Published by Oxford University Press on behalf of the Johns Hopkins Bloomberg School of Public Health. All rights reserved.We live in the era of genomics and big data. Evaluating the impact on health of large-scale biological, social, and environmental data is an emerging challenge in the field of epidemiology. In the past 3 years, major discussions and plans for the future of epidemiology, including with several recommendations for actions to transform the field, have been launched by 2 institutes within the National Institutes of Health. In the present commentary, I briefly explore the themes of these recommendations and their effects on leadership, resources, cohort infrastructure, and training. Ongoing engagement within the epidemiology community is needed to determine how to shape the evolution of the field and what truly matters for changing population health. We also need to assess how to leverage existing epidemiology resources and develop new studies to improve human health. Readers are invited to examine these recommendations, consider others that might be important, and join in the conversation about the future of epidemiology.}, author = {Khoury, M.J. Muin J.}, doi = {10.1093/aje/kwv228}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Khoury - 2015 - Planning for the Future of Epidemiology in the Era of Big Data and Precision Medicine.pdf:pdf}, journal = {American Journal of Epidemiology}, keywords = {1,a recent blog post,big data,by 2 institutes within,discussions,epidemiology,funding,future of epidemiology conducted,genomics,multiyear strategic efforts and,on planning for the,precision medicine,summarized the results of,the national,training}, mendeley-tags = {big data}, number = {12}, pages = {977--979}, title = {{Planning for the Future of Epidemiology in the Era of Big Data and Precision Medicine}}, volume = {182}, year = {2015} } @article{Salathe2016, abstract = {{\textcopyright} The Author 2016.The digital revolution has contributed to very large data sets (ie, big data) relevant for public health. The two major data sources are electronic health records from traditional health systems and patient-generated data. As the two data sources have complementary strengths-high veracity in the data from traditional sources and high velocity and variety in patient-generated data-they can be combined to build more-robust public health systems. However, they also have unique challenges. Patient-generated data in particular are often completely unstructured and highly context dependent, posing essentially a machine-learning challenge. Some recent examples from infectious disease surveillance and adverse drug event monitoring demonstrate that the technical challenges can be solved. Despite these advances, the problem of verification remains, and unless traditional and digital epidemiologic approaches are combined, these data sources will be constrained by their intrinsic limits.}, author = {Salath{\'{e}}, Marcel}, doi = {10.1093/infdis/jiw281}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Salath{\'{e}} - 2016 - Digital pharmacovigilance and disease surveillance Combining traditional and big-data systems for better public health.pdf:pdf}, issn = {0022-1899}, journal = {Journal of Infectious Diseases}, keywords = {big data}, mendeley-tags = {big data}, month = {dec}, number = {suppl 4}, pages = {S399--S403}, title = {{Digital pharmacovigilance and disease surveillance: Combining traditional and big-data systems for better public health}}, url = {https://academic.oup.com/jid/article-lookup/doi/10.1093/infdis/jiw281}, volume = {214}, year = {2016} } @article{Lee2016, abstract = {{\textcopyright} The Author 2016.Spatial big data have the velocity, volume, and variety of big data sources and contain additional geographic information. Digital data sources, such as medical claims, mobile phone call data records, and geographically tagged tweets, have entered infectious diseases epidemiology as novel sources of data to complement traditional infectious disease surveillance. In this work, we provide examples of how spatial big data have been used thus far in epidemiological analyses and describe opportunities for these sources to improve disease-mitigation strategies and public health coordination. In addition, we consider the technical, practical, and ethical challenges with the use of spatial big data in infectious disease surveillance and inference. Finally, we discuss the implications of the rising use of spatial big data in epidemiology to health risk communication, and public health policy recommendations and coordination across scales.}, archivePrefix = {arXiv}, arxivId = {1605.08740}, author = {Lee, E.C. C Elizabeth C and Asher, J.M. M Jason M and Goldlust, Sandra and Kraemer, John D J.D. D and Lawson, Andrew B A.B. B and Bansal, Shweta}, doi = {10.1093/infdis/jiw344}, eprint = {1605.08740}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Lee et al. - 2016 - Mind the Scales Harnessing Spatial Big Data for Infectious Disease Surveillance and Inference.pdf:pdf;:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Lee et al. - 2016 - Mind the scales Harnessing spatial big data for infectious disease surveillance and inference(2).pdf:pdf}, issn = {0022-1899}, journal = {Journal of Infectious Diseases}, keywords = {and,big data,digital epidemiology,disease mapping,during one of epidemiology,infectious diseases,john snow,mapped london households in,risk of cholera associated,s formative moments,spatial,spatial big data,spatial epidemiology,statistical bias,succeeded in highlighting the,which residents had cholera,with the}, mendeley-tags = {big data,spatial}, number = {Suppl 4}, pages = {S409--S413}, title = {{Mind the scales: Harnessing spatial big data for infectious disease surveillance and inference}}, volume = {214}, year = {2016} } @inproceedings{Othman2016, abstract = {{\textcopyright} 2016 IEEE.This paper introduces Dengue Active Surveillance System (DASS) framework for an early warning system of the outbreak. Dengue and dengue hemorrhagic fever are emerging as major public health problems in most Asian countries such as Malaysia. Effective prevention and control programs will depend on improved surveillance. A new approach to active surveillance outlined with emphasis on the inter-epidemic period. The objective is to develop an early warning surveillance system (framework) that can predict epidemic dengue to improve current passive surveillance system available in Malaysia. Basically, the framework introduced data harvesting process from multiple sources as input, data pre-processing using data aggregator and filtering engine, storing large data in repository, analytic engine for analysis and processing the large data, and presentation of the information to the users. The data harvested from two major sources such as weather or flood information, and social media such as build development and dengue symptom using system API, SOAP and others. The data aggregator will aggregate the data from three different types of data such as structured, semi-structured and unstructured data to be stored into the semi-structured database such as MongoDB and NoSQL. The data parse to the filtering engine for filtering and cleaning the data sources using suitable keywords prior to store it in the large data repository. After that, the large data will be processed and analyzed using algorithm or mathematical calculation to determine the expected dengue cases. Then, the processed information will be presented to the users in a form of web or mobile application and other method, for example, short message service (SMS). Finally, the system accuracy will be evaluated based on the comparison study with the traditional passive system.}, author = {Othman, Mohd Khalit and Danuri, M.S.N.M. Mohd Shahrul Nizam Mohd}, booktitle = {2016 International Conference on Information and Communication Technology (ICICTM)}, doi = {10.1109/ICICTM.2016.7890783}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Othman, Danuri - 2016 - Proposed conceptual framework of Dengue Active Surveillance System (DASS) in Malaysia.pdf:pdf}, isbn = {978-1-5090-0412-6}, keywords = {biosurveillance example}, mendeley-tags = {biosurveillance example}, pages = {90--96}, publisher = {IEEE}, title = {{Proposed conceptual framework of Dengue Active Surveillance System (DASS) in Malaysia}}, url = {http://ieeexplore.ieee.org/document/7890783/}, year = {2016} } @article{Oshea2017, abstract = {{\textcopyright} 2017 Elsevier B.V.Background Internet access and usage has changed how people seek and report health information. Meanwhile,infectious diseases continue to threaten humanity. The analysis of Big Data, or vast digital data, presents an opportunity to improve disease surveillance and epidemic intelligence. Epidemic intelligence contains two components: indicator based and event-based. A relatively new surveillance type has emerged called event-based Internet biosurveillance systems. These systems use information on events impacting health from Internet sources, such as social media or news aggregates. These systems circumvent the limitations of traditional reporting systems by being inexpensive, transparent, and flexible. Yet, innovations and the functionality of these systems can change rapidly. Aim To update the current state of knowledge on event-based Internet biosurveillance systems by identifying all systems, including current functionality, with hopes to aid decision makers with whether to incorporate new methods into comprehensive programmes of surveillance. Methods A systematic review was performed through PubMed, Scopus, and Google Scholar databases, while also including grey literature and other publication types. Results 50 event-based Internet systems were identified, including an extraction of 15 attributes for each system, described in 99 articles. Each system uses different innovative technology and data sources to gather data, process, and disseminate data to detect infectious disease outbreaks. Conclusions The review emphasises the importance of using both formal and informal sources for timely and accurate infectious disease outbreak surveillance, cataloguing all event-based Internet biosurveillance systems. By doing so, future researchers will be able to use this review as a library for referencing systems, with hopes of learning, building, and expanding Internet-based surveillance systems. Event-based Internet biosurveillance should act as an extension of traditional systems, to be utilised as an additional, supplemental data source to have a more comprehensive estimate of disease burden.}, author = {{O 'shea}, Jesse and O'Shea, Jesse}, doi = {10.1016/j.ijmedinf.2017.01.019}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/O 'shea, O'Shea - 2017 - Digital disease detection A systematic review of event-based internet biosurveillance systems.pdf:pdf}, issn = {13865056}, journal = {International Journal of Medical Informatics}, keywords = {Biosurveillance,Disease surveillance,Public health,webcrawl}, mendeley-tags = {webcrawl}, month = {may}, pages = {15--22}, title = {{Digital disease detection: A systematic review of event-based internet biosurveillance systems}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S1386505617300308 http://ac.els-cdn.com/S1386505617300308/1-s2.0-S1386505617300308-main.pdf?{\_}tid=9f3eb2fe-397b-11e7-a63c-00000aab0f27{\&}acdnat=1494859057{\_}fbabbd3631cf0a877cfa3dda69e731ab}, volume = {101}, year = {2017} } @article{Toh2013, abstract = {A recent assessment of drugs that target the renin-angiotensin-aldosterone system and angioedema risk drew from a source population of more than 100 million people and 350 million person-years of observation time. The assessment identified 3.9 million eligible new users of angiotensin-converting enzyme inhibitors (ACEIs), angiotensin receptor blockers (ARBs), the direct renin inhibitor aliskiren, or the common referent group beta-blockers (a class of drugs not thought to affect the risk of angioedema). More than 4500 outcome events were observed.1 The assessment replicated a well-known association between ACEIs and angioedema,2–4 but the risk estimates were much more precise than those from prior studies. The assessment also generated new evidence for ARBs and aliskiren. Not so long ago, an assessment of such scale existed only in our imaginations. Secondary uses of routinely collected electronic health information now enable us to conduct research using data from hundreds of thousands or even millions of patients.5 But certain studies or surveillance activities, especially those with rare exposure or outcome, demand data larger than any single extant source. Combining data from multiple sources would help solve the sample size problem, but sharing data has always been a challenge because of privacy, security, regulatory, legal, and proprietary concerns. How did the angioedema assessment accomplish this and what implications does it have for epidemiology?}, author = {Toh, Sengwee and Platt, Richard}, doi = {10.1097/EDE.0b013e31828ac65e}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Toh, Platt - 2013 - Is Size the Next Big Thing in Epidemiology.pdf:pdf}, isbn = {1044-3983}, issn = {1044-3983}, journal = {Epidemiology}, keywords = {big data}, mendeley-tags = {big data}, month = {may}, number = {3}, pages = {349--351}, pmid = {23549179}, title = {{Is Size the Next Big Thing in Epidemiology?}}, url = {http://content.wkhealth.com/linkback/openurl?sid=WKPTLP:landingpage{\&}an=00001648-201305000-00004 http://www.ncbi.nlm.nih.gov/pubmed/23549179}, volume = {24}, year = {2013} } @article{Andreu-Perez2015, abstract = {This paper provides an overview of recent developments in big data in the context of biomedical and health informatics. It outlines the key characteristics of big data and how medical and health informatics, translational bioinformatics, sensor informatics and imaging informatics will benefit from an integrated approach of piecing together different aspects of personalized information from a diverse range of data sources, both structured and unstructured, covering genomics, proteomics, metabolomics, as well as imaging, clinical diagnosis, and long-term continuous physiological sensing of an individual. It is expected that recent advances in big data will expand our knowledge for testing new hypotheses about disease management, from diagnosis, to prevention to personalized treatment. The rise of big data, however, also raises challenges in terms of privacy, security, data ownership, data stewardship and governance. This paper discusses some of the existing activities and future opportunities related to big data for health, outlining some of the key underlying issues that need to be tackled.}, author = {Andreu-Perez, Javier and Poon, Carmen C. Y. and Merrifield, Robert D. and Wong, Stephen T. C. and Yang, Guang-Zhong Zhong}, doi = {10.1109/JBHI.2015.2450362}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Andreu-Perez et al. - 2015 - Big Data for Health.pdf:pdf}, isbn = {2168-2194 VO - 19}, issn = {21682194}, journal = {IEEE Journal of Biomedical and Health Informatics}, keywords = {Big data,big data,bioinformatics,health informatics,medical imaging,medical informatics,precision medicine,sensor informatics,social health}, mendeley-tags = {big data}, month = {jul}, number = {4}, pages = {1193--1208}, pmid = {26173222}, title = {{Big Data for Health}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/26173222 http://ieeexplore.ieee.org/document/7154395/}, volume = {19}, year = {2015} } @article{Davidson2015, abstract = {Seasonal influenza infects approximately 5-20{\%} of the U.S. population every year, resulting in over 200,000 hospitalizations. The ability to more accurately assess infection levels and predict which regions have higher infection risk in future time periods can instruct targeted prevention and treatment efforts, especially during epidemics. Google Flu Trends (GFT) has generated significant hope that'big data' can be an effective tool for estimating disease burden and spread. The estimates generated by GFT come in real-time-two weeks earlier than traditional surveillance data collected by the U.S. Centers for Disease Control and Prevention (CDC). However, GFT had some infamous errors and is significantly less accurate at tracking laboratory-confirmed cases than syndromic influenza-like illness (ILI) cases. We construct an empirical network using CDC data and combine this with GFT to substantially improve its performance. This improved model predicts infections one week into the future as well as GFT predicts the present and does particularly well in regions that are most likely to facilitate influenza spread and during epidemics.}, author = {Davidson, Michael W. and Haim, Dotan A. and Radin, Jennifer M.}, doi = {10.1038/srep08154}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Davidson, Haim, Radin - 2015 - Using Networks to Combine “Big Data” and Traditional Surveillance to Improve Influenza Predictions.pdf:pdf}, issn = {2045-2322}, journal = {Scientific Reports}, keywords = {webcrawl}, mendeley-tags = {webcrawl}, month = {jul}, number = {Region 3}, pages = {8154}, title = {{Using Networks to Combine “Big Data” and Traditional Surveillance to Improve Influenza Predictions}}, url = {http://www.nature.com/articles/srep08154}, volume = {5}, year = {2015} } @article{Chan2011, abstract = {A variety of obstacles including bureaucracy and lack of resources have interfered with timely detection and reporting of dengue cases in many endemic countries. Surveillance efforts have turned to modern data sources, such as Internet search queries, which have been shown to be effective for monitoring influenza-like illnesses. However, few have evaluated the utility of web search query data for other diseases, especially those of high morbidity and mortality or where a vaccine may not exist. In this study, we aimed to assess whether web search queries are a viable data source for the early detection and monitoring of dengue epidemics.Bolivia, Brazil, India, Indonesia and Singapore were chosen for analysis based on available data and adequate search volume. For each country, a univariate linear model was then built by fitting a time series of the fraction of Google search query volume for specific dengue-related queries from that country against a time series of official dengue case counts for a time-frame within 2003-2010. The specific combination of queries used was chosen to maximize model fit. Spurious spikes in the data were also removed prior to model fitting. The final models, fit using a training subset of the data, were cross-validated against both the overall dataset and a holdout subset of the data. All models were found to fit the data quite well, with validation correlations ranging from 0.82 to 0.99.Web search query data were found to be capable of tracking dengue activity in Bolivia, Brazil, India, Indonesia and Singapore. Whereas traditional dengue data from official sources are often not available until after some substantial delay, web search query data are available in near real-time. These data represent valuable complement to assist with traditional dengue surveillance.}, author = {Chan, Emily H. and Sahai, Vikram and Conrad, Corrie and Brownstein, John S. and Kabra, SK}, doi = {10.1371/journal.pntd.0001206}, editor = {Aksoy, Serap}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Chan et al. - 2011 - Using Web Search Query Data to Monitor Dengue Epidemics A New Model for Neglected Tropical Disease Surveillance.pdf:pdf}, institution = {Children's Hospital Informatics Program, Harvard-Massachusetts Institute of Technology Division of Health Sciences and Technology, Boston, Massachusetts,USA.}, issn = {1935-2735}, journal = {PLoS Neglected Tropical Diseases}, keywords = {Asia,Dengue,Disease Outbreaks,Humans,India,Internet,Models,Neglected Diseases,Population Surveillance,Public Health Informatics,Sentinel Surveillance,South America,Southeastern,Statistical,epidemiology,methods,webcrawl}, mendeley-tags = {webcrawl}, month = {may}, number = {5}, pages = {e1206}, pmid = {21647308}, publisher = {AMIA Annu Symp Proc}, title = {{Using Web Search Query Data to Monitor Dengue Epidemics: A New Model for Neglected Tropical Disease Surveillance}}, url = {http://dx.doi.org/10.1371/journal.pntd.0001206 http://dx.plos.org/10.1371/journal.pntd.0001206}, volume = {5}, year = {2011} } @article{Chiolero2013, author = {Chiolero, Arnaud}, doi = {10.1097/EDE.0b013e31829e46dc}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Chiolero - 2013 - Big Data in Epidemiology too big to fail.pdf:pdf}, isbn = {1044-3983}, issn = {1044-3983}, journal = {Epidemiology}, keywords = {big data}, mendeley-tags = {big data}, month = {nov}, number = {6}, pages = {938--939}, pmid = {24077000}, title = {{Big Data in Epidemiology: too big to fail?}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/24077000 http://content.wkhealth.com/linkback/openurl?sid=WKPTLP:landingpage{\&}an=00001648-201311000-00024 http://content.wkhealth.com/linkback/openurl?sid=WKPTLP:landingpage{\&}an=00001648-201311000-00025}, volume = {24}, year = {2013} } @article{Asokan2015, abstract = {{\textcopyright} 2015 Ministry of Health, Saudi Arabia.Zoonoses constitute 61{\%} of all known infectious diseases. The major obstacles to control zoonoses include insensitive systems and unreliable data. Intelligent handling of the cost effective big data can accomplish the goals of one health to detect disease trends, outbreaks, pathogens and causes of emergence in human and animals.}, author = {Asokan, G.V. and Asokan, Vanitha}, doi = {10.1016/j.jegh.2015.02.001}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Asokan, Asokan - 2015 - Leveraging “big data” to enhance the effectiveness of “one health” in an era of health informatics.pdf:pdf}, issn = {22106006}, journal = {Journal of Epidemiology and Global Health}, keywords = {Big data,Health informatics,One health,Zoonoses,big data,one health}, mendeley-tags = {big data,one health}, month = {dec}, number = {4}, pages = {311--314}, pmid = {25747185}, title = {{Leveraging “big data” to enhance the effectiveness of “one health” in an era of health informatics}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S2210600615000283 http://www.ncbi.nlm.nih.gov/pubmed/25747185}, volume = {5}, year = {2015} } @article{Bansal2016, abstract = {{\textcopyright} The Author 2016.We devote a special issue of the Journal of Infectious Diseases to review the recent advances of big data in strengthening disease surveillance, monitoring medical adverse events, informing transmission models, and tracking patient sentiments and mobility. We consider a broad definition of big data for public health, one encompassing patient information gathered from high-volume electronic health records and participatory surveillance systems, as well as mining of digital traces such as social media, Internet searches, and cell-phone logs. We introduce nine independent contributions to this special issue and highlight several cross-cutting areas that require further research, including representativeness, biases, volatility, and validation, and the need for robust statistical and hypotheses-driven analyses. Overall, we are optimistic that the big-data revolution will vastly improve the granularity and timeliness of available epidemiological information, with hybrid systems augmenting rather than supplanting traditional surveillance systems, and better prospects for accurate infectious diseases models and forecasts.}, author = {Bansal, S and Chowell, G and Simonsen, L and Vespignani, A and Viboud, C}, doi = {10.1093/infdis/jiw400}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Bansal et al. - 2016 - Big data for infectious disease surveillance and modeling.pdf:pdf}, journal = {Journal of Infectious Diseases}, keywords = {big data,modeling,surveillance}, mendeley-tags = {big data,modeling,surveillance}, title = {{Big data for infectious disease surveillance and modeling}}, volume = {214}, year = {2016} } @article{McCue2017, author = {McCue, Molly E. and McCoy, Annette M.}, doi = {10.3389/fvets.2017.00194}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/McCue, McCoy - 2017 - The Scope of Big Data in One Medicine Unprecedented Opportunities and Challenges.pdf:pdf}, issn = {2297-1769}, journal = {Frontiers in Veterinary Science}, keywords = {as of this writing,big data,bioinformatics,clinical informatics,deep phenotyping,deep phenotyping, multilayer disease module, netwo,environmental epidemiology,genetic epidemiology,has become a catch,including medicine,multilayer disease module,network medicine,overview and introduction,phrase across many industries,structural informatics}, number = {November}, pages = {1--23}, title = {{The Scope of Big Data in One Medicine: Unprecedented Opportunities and Challenges}}, url = {http://journal.frontiersin.org/article/10.3389/fvets.2017.00194/full}, volume = {4}, year = {2017} } @article{Wang2014, abstract = {BACKGROUND In the past few decades, medically related data collection saw a huge increase, referred to as big data. These huge datasets bring challenges in storage, processing, and analysis. In clinical medicine, big data is expected to play an important role in identifying causality of patient symptoms, in predicting hazards of disease incidence or reoccurrence, and in improving primary-care quality. OBJECTIVE The objective of this review was to provide an overview of the features of clinical big data, describe a few commonly employed computational algorithms, statistical methods, and software toolkits for data manipulation and analysis, and discuss the challenges and limitations in this realm. METHODS We conducted a literature review to identify studies on big data in medicine, especially clinical medicine. We used different combinations of keywords to search PubMed, Science Direct, Web of Knowledge, and Google Scholar for literature of interest from the past 10 years. RESULTS This paper reviewed studies that analyzed clinical big data and discussed issues related to storage and analysis of this type of data. CONCLUSIONS Big data is becoming a common feature of biological and clinical studies. Researchers who use clinical big data face multiple challenges, and the data itself has limitations. It is imperative that methodologies for data analysis keep pace with our ability to collect and store data.}, author = {Wang, Weiqi and Krishnan, Eswar}, doi = {10.2196/medinform.2913}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Wang, Krishnan - 2014 - Big data and clinicians a review on the state of the science.pdf:pdf}, issn = {2291-9694}, journal = {JMIR medical informatics}, keywords = {Big Data,big data,clinical research,database,medical informatics,medicine}, month = {jan}, number = {1}, pages = {e1}, pmid = {25600256}, publisher = {JMIR Medical Informatics}, title = {{Big data and clinicians: a review on the state of the science.}}, url = {http://www.medinform.jmir.org/2014/1/e1/ http://www.ncbi.nlm.nih.gov/pubmed/25600256 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC4288113}, volume = {2}, year = {2014} } @article{Manlove2016, abstract = {The One Health initiative is a global effort fostering interdisciplinary collaborations to address challenges in human, animal, and environmental health. While One Health has received considerable press, its benefits remain unclear because its effects have not been quantitatively described. We systematically surveyed the published literature and used social network analysis to measure interdisciplinarity in One Health studies constructing dynamic pathogen transmission models. The number of publications fulfilling our search criteria increased by 14.6{\%} per year, which is faster than growth rates for life sciences as a whole and for most biology subdisciplines. Surveyed publications clustered into three communities: one used by ecologists, one used by veterinarians, and a third diverse-authorship community used by population biologists, mathematicians, epidemiologists, and experts in human health. Overlap between these communities increased through time in terms of author number, diversity of co-author affiliations, and diversity of citations. However, communities continue to differ in the systems studied, questions asked, and methods employed. While the infectious disease research community has made significant progress toward integrating its participating disciplines, some segregation-especially along the veterinary/ecological research interface-remains.}, author = {Manlove, Kezia R. and Walker, Josephine G. and Craft, Meggan E. and Huyvaert, Kathryn P. and Joseph, Maxwell B. and Miller, Ryan S. and Nol, Pauline and Patyk, Kelly A. and O'Brien, Daniel and Walsh, Daniel P. and Cross, Paul C.}, doi = {10.1371/journal.pbio.1002448}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Manlove et al. - 2016 - “One Health” or Three Publication Silos Among the One Health Disciplines.pdf:pdf}, isbn = {1544-9173}, issn = {15457885}, journal = {PLoS Biology}, keywords = {one health}, mendeley-tags = {one health}, number = {4}, pages = {1--14}, pmid = {27100532}, title = {{“One Health” or Three? Publication Silos Among the One Health Disciplines}}, volume = {14}, year = {2016} } @article{Charles-Smith2015, abstract = {OBJECTIVE: Research studies show that social media may be valuable tools in the disease surveillance toolkit used for improving public health professionals' ability to detect disease outbreaks faster than traditional methods and to enhance outbreak response. A social media work group, consisting of surveillance practitioners, academic researchers, and other subject matter experts convened by the International Society for Disease Surveillance, conducted a systematic primary literature review using the PRISMA framework to identify research, published through February 2013, answering either of the following questions: Can social media be integrated into disease surveillance practice and outbreak management to support and improve public health?Can social media be used to effectively target populations, specifically vulnerable populations, to test an intervention and interact with a community to improve health outcomes?Examples of social media included are Facebook, MySpace, microblogs (e.g., Twitter), blogs, and discussion forums. For Question 1, 33 manuscripts were identified, starting in 2009 with topics on Influenza-like Illnesses (n = 15), Infectious Diseases (n = 6), Non-infectious Diseases (n = 4), Medication and Vaccines (n = 3), and Other (n = 5). For Question 2, 32 manuscripts were identified, the first in 2000 with topics on Health Risk Behaviors (n = 10), Infectious Diseases (n = 3), Non-infectious Diseases (n = 9), and Other (n = 10).$\backslash$n$\backslash$nCONCLUSIONS: The literature on the use of social media to support public health practice has identified many gaps and biases in current knowledge. Despite the potential for success identified in exploratory studies, there are limited studies on interventions and little use of social media in practice. However, information gleaned from the articles demonstrates the effectiveness of social media in supporting and improving public health and in identifying target populations for intervention. A primary recommendation resulting from the review is to identify opportunities that enable public health professionals to integrate social media analytics into disease surveillance and outbreak management practice.}, archivePrefix = {arXiv}, arxivId = {1401.1032}, author = {Charles-Smith, Lauren E. and Reynolds, Tera L. and Cameron, Mark A. and Conway, Mike and Lau, Eric H Y and Olsen, Jennifer M. and Pavlin, Julie A. and Shigematsu, Mika and Streichert, Laura C. and Suda, Katie J. and Corley, Courtney D.}, doi = {10.1371/journal.pone.0139701}, eprint = {1401.1032}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Charles-Smith et al. - 2015 - Using social media for actionable disease surveillance and outbreak management A systematic literature rev.pdf:pdf}, isbn = {1932-6203}, issn = {19326203}, journal = {PLoS ONE}, keywords = {social media}, mendeley-tags = {social media}, number = {10}, pages = {1--20}, pmid = {26437454}, title = {{Using social media for actionable disease surveillance and outbreak management: A systematic literature review}}, volume = {10}, year = {2015} } @article{Larson2013, author = {Larson, Eric B}, doi = {10.1001/jama.2013.5914}, issn = {0098-7484}, journal = {JAMA}, month = {jun}, number = {23}, pages = {2443}, title = {{Building Trust in the Power of “Big Data” Research to Serve the Public Good}}, url = {http://jama.jamanetwork.com/article.aspx?doi=10.1001/jama.2013.5914}, volume = {309}, year = {2013} } @techreport{Habl2016, abstract = {The aim of the study on Big Data in Public Health, Telemedicine and Healthcare is to identify applicable examples of the use of Big Data in Health and develop recommenda- tions for their implementation in the European Union. Examples of Big Data in Health w ere identified by a systematic literature review, after which the added value of twenty selected examples was evaluated. Based on the as- sessment of the added value and the quality of the evidence, ten priority examples were selected. Furthermore, potential policy actions for the implementation of Big Data in Health were identified in the literature, and a SWOT analysis was conducted to check the feasibility of the proposed actions. Based on this analysis, and with the help of renowned experts, the study tea m developed ten policy recommendations in the field. These recommendations were validated through public consultations at three relevant conferences in Europe and were again reviewed by the Expert Group. The recommendations aim to benefit European citizen s and patients in terms of strengthening their health and improving the performance of Member State's health systems. They should be seen as suggestions for the European Union and its Member States on how to utilise the strengths and exploit the opportunit ies of Big Data for Public Health without threatening privacy or safety of citizens. Recommendations were developed for ten relevant fields: awareness raising, education and training, data sources, open data and data sharing, applications and purposes, da ta analysis, governance of data access and use, standards, funding and financial resources, as well as legal aspects and privacy regulation.}, author = {Habl, Claudia and Renner, Anna-Theresa and Bobek, Julia and Laschkolnig, Anja}, doi = {10.2875/734795}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Habl et al. - 2016 - Study on Big Data in Public Health, Telemedine and Healthcare.pdf:pdf}, title = {{Study on Big Data in Public Health, Telemedine and Healthcare}}, year = {2016} } @techreport{EuropeanCommission2014, author = {{European Commission}, Directorate-General for Health and Consumers}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/European Commission - 2014 - The Use of Big Data in Public Health Policy and Research.pdf:pdf}, title = {{The Use of Big Data in Public Health Policy and Research}}, url = {http://ec.europa.eu/health//sites/health/files/ehealth/docs/ev{\_}20141118{\_}co07b{\_}en.pdf}, year = {2014} } @inproceedings{Kostkova2013, abstract = {The exponentially increasing stream of real time big data produced by Web 2.0 Internet and mobile networks created radically new interdisciplinary challenges for public health and computer science. Traditional public health disease surveillance systems have to utilize the potential created by new situationaware realtime signals from social media, mobile/sensor networks and citizens' participatory surveillance systems providing invaluable free realtime event-based signals for epidemic intelligence. However, rather than improving existing isolated systems, an integrated solution bringing together existing epidemic intelligence systems scanning news media (e.g., GPHIN, MedISys) with real-time social media intelligence (e.g., Twitter, participatory systems) is required to substantially improve and automate early warning, outbreak detection and preparedness operations. However, automatic monitoring and novel verification methods for these multichannel event-based real time signals has to be integrated with traditional case-based surveillance systems from microbiological laboratories and clinical reporting. Finally, the system needs effectively support coordination of epidemiological teams, risk communication with citizens and implementation of prevention measures. However, from computational perspective, signal detection, analysis and verification of very high noise realtime big data provide a number of interdisciplinary challenges for computer science. Novel approaches integrating current systems into a digital public health dashboard can enhance signal verification methods and automate the processes assisting public health experts in providing better informed and more timely response. In this paper, we describe the roadmap to such a system, components of an integrated public health surveillance services and computing challenges to be resolved to create an integrated real world solution.}, author = {Kostkova, P.}, booktitle = {WWW 2013 Companion - Proceedings of the 22nd International Conference on World Wide Web}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Kostkova - 2013 - A roadmap to integrated digital public health surveillance The vision and the challenges.pdf:pdf}, isbn = {9781450320382}, keywords = {ddds}, mendeley-tags = {ddds}, title = {{A roadmap to integrated digital public health surveillance: The vision and the challenges}}, year = {2013} } @article{Mooney2015, abstract = {{\textcopyright} 2015 Wolters Kluwer Health, Inc. All rights reserved.Big Data has increasingly been promoted as a revolutionary development in the future of science, including epidemiology. However, the definition and implications of Big Data for epidemiology remain unclear. We here provide a working definition of Big Data predicated on the so-called "three V's": variety, volume, and velocity. From this definition, we argue that Big Data has evolutionary and revolutionary implications for identifying and intervening on the determinants of population health. We suggest that as more sources of diverse data become publicly available, the ability to combine and refine these data to yield valid answers to epidemiologic questions will be invaluable. We conclude that while epidemiology as practiced today will continue to be practiced in the Big Data future, a component of our field's future value lies in integrating subject matter knowledge with increased technical savvy. our training programs and our visions for future public health interventions should reflect this future.}, author = {Mooney, S.J. and Westreich, D.J. and El-Sayed, A.M.}, doi = {10.1097/eDe.0000000000000274}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Mooney, Westreich, El-Sayed - 2015 - Commentary Epidemiology in the era of big data.pdf:pdf}, journal = {Epidemiology}, keywords = {big data}, mendeley-tags = {big data}, number = {3}, title = {{Epidemiology in the era of big data}}, volume = {26}, year = {2015} } @incollection{Pyne2015, abstract = {There is growing concern about our preparedness for controlling the spread of pandemics such as H1N1 Influenza. The dynamics of epidemic spread in large-scale populations are very complex. Further, human behavior, social contact networks, and pandemics are closely intertwined and evolve as the epidemic spread. Individuals' changing behaviors in response to public policies and their evolving perception of how an infectious disease outbreak is unfolding can dramatically alter normal social interactions. Effective planning and response strategies must take these complicated interactions into account. Mathematical models are key to understanding the spread of epidemics. In this chapter, we discuss a recent approach of diffusion in network models for studying the complex dynamics of epidemics in large-scale populations. Analyzing these models leads to very challenging computational problems. Further, using these models for forecasting epidemic spread and developing public health policies leads to issues that are characteristic of big data applications. The chapter describes the state of the art in computational and big data epidemiology. {\textcopyright} 2015 Elsevier B.V.}, author = {Pyne, Saumyadipta and Vullikanti, Anile Kumar S. and Marathe, Madhav V.}, booktitle = {Handbook of Statistics}, doi = {10.1016/B978-0-444-63492-4.00008-3}, isbn = {9780444634924}, pages = {171--202}, title = {{Big Data Applications in Health Sciences and Epidemiology}}, url = {http://linkinghub.elsevier.com/retrieve/pii/B9780444634924000083}, volume = {33}, year = {2015} } @article{Roger2015, author = {Roger, V.L.}, doi = {10.1161/CIRCOUTCOMES.115.002115}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Roger - 2015 - Of the Importance of Motherhood and Apple Pie What Big Data Can Learn from Small Data.pdf:pdf}, journal = {Circulation: Cardiovascular Quality and Outcomes}, number = {4}, title = {{Of the Importance of Motherhood and Apple Pie: What Big Data Can Learn from Small Data}}, volume = {8}, year = {2015} } @article{Iwashyna2014, abstract = {Copyright {\textcopyright} 2014 by the American Thoracic SocietyThe Big Data movement in computer science has brought dramatic changes in what counts as data,how those data are analyzed, and what can be done with those data. Although increasingly pervasive in the business world, it has only recently begun to influence clinical research and practice. As Big Data draws from different intellectual traditions than clinical epidemiology, the ideas may be less familiar to practicing clinicians. There is an increasing role of Big Data in health care, and it has tremendous potential. This Demystifying Data Seminar identifies four main strands in Big Data relevant to health care. The first is the inclusion of many new kinds of data elements into clinical research and operations, in a volume not previously routinely used. Second, Big Data asks different kinds of questions of data and emphasizes the usefulness of analyses that are explicitly associational but not causal. Third, Big Data brings new analytic approaches to bear on these questions. And fourth, Big Data embodies a new set of aspirations for a breaking down of distinctions between research data and operational data and their merging into a continuously learning health system.}, author = {Iwashyna, T.J. and Liu, V.}, doi = {10.1513/AnnalsATS.201405-185AS}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Iwashyna, Liu - 2014 - What's so different about big data A primer for clinicians trained to think epidemiologically.pdf:pdf}, journal = {Annals of the American Thoracic Society}, keywords = {big data}, mendeley-tags = {big data}, number = {7}, title = {{What's so different about big data?: A primer for clinicians trained to think epidemiologically}}, volume = {11}, year = {2014} } @article{Vayena2015, author = {Vayena, E. and Salath{\'{e}}, M. and Madoff, L.C. and Brownstein, J.S.}, doi = {10.1371/journal.pcbi.1003904}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Vayena et al. - 2015 - Ethical Challenges of Big Data in Public Health.pdf:pdf}, journal = {PLoS Computational Biology}, keywords = {ethics,webcrawl}, mendeley-tags = {ethics,webcrawl}, number = {2}, title = {{Ethical Challenges of Big Data in Public Health}}, volume = {11}, year = {2015} } @inproceedings{Liang2014, abstract = {{\textcopyright} 2014 IEEE.This research focuses on learning causal relationships on epidemiological data. We introduce the research need for causal reasoning and address one of the big data problems in epidemiology by showing the complexity of causal discovery and analysis in an observational epidemiological dataset. We also provide several computational methods of solving the problems including building a framework of causal reasoning on epidemio-logical dataset, improved algorithms for local causal discoveries, and the conceptual design of subgraph decompositions. This research further discusses how these approaches we have made are related to epidemiology. Through this research, we are able to more efficiently and effectively discover and analyze causal relationships in a big dataset of epidemiology.}, author = {Liang, Yiheng and Mikler, Armin R.}, booktitle = {2014 IEEE International Conference on Big Data (Big Data)}, doi = {10.1109/BigData.2014.7004421}, isbn = {978-1-4799-5666-1}, month = {oct}, pages = {11--18}, publisher = {IEEE}, title = {{Big data problems on discovering and analyzing causal relationships in epidemiological data}}, url = {http://ieeexplore.ieee.org/document/7004421/}, year = {2014} } @inproceedings{Nobles2015, abstract = {{\textcopyright} 2015 IEEE.Currently, a large amount of data is amassed in electronic health records (EHRs). However, EHR systems are largely information silos, that is, uses of these systems are often confined to management of patient information and analytics specific to a clinician's practice. A growing trend in healthcare is combining multiple databases to support epidemiological research. The College Health Surveillance Network is the first national data warehouse containing EHR data from 31 different student health centers. Each member university contributes to the data warehouse by uploading select EHR data including patient demographics, diagnoses, and procedures to a common server on a monthly basis. In this paper, we focus on the data quality dimensions from a subsample of the data comprised of over 5.7 million patient visits for approximately 980,000 patients with 4,465 unique diagnoses from 23 of those universities. We examine the data for measures of completeness, consistency, and availability for secondary use for epidemiological research. Additionally, clinical documentation practices and EHR vendor were evaluated as potential contributors to data quality. We found that overall about 70{\%} of the data in the data warehouse is available for secondary use, and identified clinical documentation practices that are correlated to a reduction in data quality. This suggests that automated quality control and proactive clinical documentation support could reduce ad-hoc data cleaning needs resulting in greater data availability for secondary use.}, author = {Nobles, Alicia L. and Vilankar, Ketki and Wu, Hao and Barnes, Laura E.}, booktitle = {2015 IEEE International Conference on Big Data (Big Data)}, doi = {10.1109/BigData.2015.7364060}, isbn = {978-1-4799-9926-2}, month = {oct}, pages = {2612--2620}, publisher = {IEEE}, title = {{Evaluation of data quality of multisite electronic health record data for secondary analysis}}, url = {http://ieeexplore.ieee.org/document/7364060/}, year = {2015} } @article{Brownson2015, abstract = {{\textcopyright} 2015 Elsevier Inc.Purpose: To identify macro-level trends that are changing the needs of epidemiologic research and practice and to develop and disseminate a set of competencies and recommendations for epidemiologic training that will be responsive to these changing needs. Methods: There were three stages to the project: (1) assembling of a working group of senior epidemiologists from multiple sectors, (2) identifying relevant literature, and (3) conducting key informant interviews with 15 experienced epidemiologists. Results: Twelve macro trends were identified along with associated actions for the field and educational competencies. The macro trends include the following: (1) "Big Data" or informatics, (2) the changing health communication environment, (3) the Affordable Care Act or health care system reform, (4) shifting demographics, (5) globalization, (6) emerging high-throughput technologies (omics), (7) a greater focus on accountability, (8) privacy changes, (9) a greater focus on "upstream" causes of disease, (10) the emergence of translational sciences, (11) the growing centrality of team and transdisciplinary science, and (12) the evolving funding environment. Conclusions: Addressing these issues through curricular change is needed to allow the field of epidemiology to more fully reach and sustain its full potential to benefit population health and remain a scientific discipline that makes critical contributions toward ensuring clinical, social, and population health.}, author = {Brownson, R.C. and Samet, J.M. and Chavez, G.F. and Davies, M.M. and Galea, S. and Hiatt, R.A. and Hornung, C.A. and Khoury, M.J. and Koo, D. and Mays, V.M. and Remington, P. and Yarber, L.}, doi = {10.1016/j.annepidem.2015.03.002}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Brownson et al. - 2015 - Charting a future for epidemiologic training.pdf:pdf}, journal = {Annals of Epidemiology}, number = {6}, title = {{Charting a future for epidemiologic training}}, volume = {25}, year = {2015} } @inproceedings{Boman2015, abstract = {{\textcopyright} 2014 IEEE.Resting on our experience of computational epidemiology in practice and of industrial projects on analytics of complex networks, we point to an innovation opportunity for improving the digital services to epidemiologists for monitoring, modeling, and mitigating the effects of communicable disease. Artificial intelligence and intelligent analytics of syndromic surveillance data promise new insights to epidemiologists, but the real value can only be realized if human assessments are paired with assessments made by machines. Neither massive data itself, nor careful analytics will necessarily lead to better informed decisions. The process producing feedback to humans on decision making informed by machines can be reversed to consider feedback to machines on decision making informed by humans, enabling learning machines. We predict and argue for the fact that the sensemaking that such machines can perform in tandem with humans can be of immense value to epidemiologists in the future.}, author = {Boman, M. and Gillblad, D.}, booktitle = {Proceedings - 2014 IEEE International Conference on Big Data, IEEE Big Data 2014}, doi = {10.1109/BigData.2014.7004419}, isbn = {9781479956654}, title = {{Learning machines for computational epidemiology}}, year = {2015} } @inproceedings{Romano2016, abstract = {{\textcopyright} 2016 IEEE.Many studies have indicated the potential of using Social Networks for the early detection of public health events, such as epidemic outbreaks, so that a faster response can take place. Anyhow, the most of these studies are focused on one or two diseases, and consequently to date it is not clear if and how different outbreaks give rise to different temporal dynamics of the messages. Furthermore, it is not clear if it is possible to define a single generic Data Mining solution for the detection of epidemic outbreaks from this Big Data, or if specifically tailored approaches should be implemented for each disease. To get an insight on this issue, we collected a massive dataset of Twitter messages to extract relevant information regarding different outbreaks from different countries in 2011. The manual analysis we conducted allowed us to define some macro-classes of diseases. Results show that there is a considerable variability in the temporal dynamics of Twitter messages from different diseases, and that the identification of a suitable source of information, to define a ground truth suitable for the assessment of time series analysis algorithms, is a challenging task. Finally we also report on a special case we found, highlighting that a lot of research has still to be done in this field.}, author = {Romano, Sara and Martino, Sergio Di and Kanhabua, Nattiya and Mazzeo, Antonino and Nejdl, Wolfgang}, booktitle = {2016 30th International Conference on Advanced Information Networking and Applications Workshops (WAINA)}, doi = {10.1109/WAINA.2016.111}, isbn = {978-1-5090-2461-2}, month = {mar}, pages = {69--74}, publisher = {IEEE}, title = {{Challenges in Detecting Epidemic Outbreaks from Social Networks}}, url = {http://ieeexplore.ieee.org/document/7471175/}, year = {2016} } @inproceedings{Curran2016, abstract = {Epidemics are a serious public health challenge, with epidemiologists and health analysts constantly trying to find more succinct ways to predict, and then prevent or minimize their impact. An important problem facing health systems is ensuring they are prepared for severe epidemics. Being able to predict an epidemic is only one part of the problem: resources need to be monitored in order to ensure their availability in the event of severe epidemics. Using System Dynamic modelling, health analysts can predict epidemics to a certain extent using previous infection dynamics, however mitigation strategies would be improved dramatically if the prediction was in real-Time, utilizing the full potential of information from a range of sources: participatory surveillance systems, sentinel data from General Practitioners (GPs) etc. Using these techniques alongside Surge Capacity modelling allows the monitoring of resources for all areas of the health system, equipment levels, staff levels, and bed availability etc., ensuring better preparedness. This paper introduces a way to bring these concepts together, and highlights future work which will expand on these ideas allowing for the possible reallocation of resources in the event of shortage in some areas, and spare capacity in others.}, author = {Curran, M. and Howley, E. and Duggan, J.}, booktitle = {DH 2016 - Proceedings of the 2016 Digital Health Conference}, doi = {10.1145/2896338.2896354}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Curran, Howley, Duggan - 2016 - An analytics framework to support surge capacity planning for emerging epidemics.pdf:pdf}, isbn = {9781450342247}, keywords = {data driven,modeling,syndromic surveillance}, mendeley-tags = {data driven,modeling,syndromic surveillance}, title = {{An analytics framework to support surge capacity planning for emerging epidemics}}, year = {2016} } @article{Huang2016, abstract = {{\textcopyright} 2016 Huang et al.The estimation of disease prevalence in online search engine data (e.g., Google Flu Trends (GFT)) has received a considerable amount of scholarly and public attention in recent years. While the utility of search engine data for disease surveillance has been demonstrated, the scientific community still seeks ways to identify and reduce biases that are embedded in search engine data. The primary goal of this study is to explore new ways of improving the accuracy of disease prevalence estimations by combining traditional disease data with search engine data. A novel method, Biased Sentinel Hospital-based Area Disease Estimation (B-SHADE), is introduced to reduce search engine data bias from a geographical perspective. To monitor search trends on Hand, Foot and Mouth Disease (HFMD) in Guangdong Province, China, we tested our approach by selecting 11 keywords from the Baidu index platform, a Chinese big data analyst similar to GFT. The correlation between the number of real cases and the composite index was 0.8. After decomposing the composite index at the city level, we found that only 10 cities presented a correlation of close to 0.8 or higher. These cities were found to be more stable with respect to search volume, and they were selected as sample cities in order to estimate the search volume of the entire province. After the estimation, the correlation improved from 0.8 to 0.864. After fitting the revised search volume with historical cases, the mean absolute error was 11.19{\%} lower than it was when the original search volume and historical cases were combined. To our knowledge, this is the first study to reduce search engine data bias levels through the use of rigorous spatial sampling strategies.}, author = {Huang, D.-C. and Wang, J.-F. and Huang, J.-X. and Sui, D.Z. and Zhang, H.-Y. and Hu, M.-G. and Xu, C.-D.}, doi = {10.1371/journal.pcbi.1004876}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Huang et al. - 2016 - Towards Identifying and Reducing the Bias of Disease Information Extracted from Search Engine Data.pdf:pdf}, journal = {PLoS Computational Biology}, keywords = {webcrawl}, mendeley-tags = {webcrawl}, number = {6}, title = {{Towards Identifying and Reducing the Bias of Disease Information Extracted from Search Engine Data}}, volume = {12}, year = {2016} } @article{Leyens2017, abstract = {{\textcopyright} 2016 WILEY PERIODICALS, INC.The use of data analytics across the entire healthcare value chain, from drug discovery and development through epidemiology to informed clinical decision for patients or policy making for public health, has seen an explosion in the recent years. The increase in quantity and variety of data available together with the improvement of storing capabilities and analytical tools offer numerous possibilities to all stakeholders (manufacturers, regulators, payers, healthcare providers, decision makers, researchers) but most importantly, it has the potential to improve general health outcomes if we learn how to exploit it in the right way. This article looks at the different sources of data and the importance of unstructured data. It goes on to summarize current and potential future uses in drug discovery, development, and monitoring as well as in public and personal healthcare; including examples of good practice and recent developments. Finally, we discuss the main practical and ethical challenges to unravel the full potential of big data in healthcare and conclude that all stakeholders need to work together towards the common goal of making sense of the available data for the common good.}, author = {Leyens, Lada and Reumann, Matthias and Malats, Nuria and Brand, Angela}, doi = {10.1002/gepi.22012}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Leyens et al. - 2017 - Use of big data for drug development and for public and personal health and care.pdf:pdf}, issn = {07410395}, journal = {Genetic Epidemiology}, keywords = {big data}, mendeley-tags = {big data}, month = {jan}, number = {1}, pages = {51--60}, title = {{Use of big data for drug development and for public and personal health and care}}, url = {http://doi.wiley.com/10.1002/gepi.22012}, volume = {41}, year = {2017} } @inproceedings{Roberts2016, abstract = {{\textcopyright} 2016 IEEE.Communicating statistical uncertainty to non-expert users is essential to translating data driven insights to create impact in the 'real world'. Embedding uncertainty in data visualizations however, can be a significant design challenge due when communicating to non-expert decision makers, and has been avoided in the past due to fear of overwhelming or confusing the audience. This research aims to explore interactive disease mapping features that enable the user to explore the data and reveal the uncertainty within the information presented. Understanding uncertainty enables the user to be aware of the limitations of data driven insights, and leads to more informed decision making processes.}, author = {Roberts, Jessie and Gough, Phillip}, booktitle = {2016 Big Data Visual Analytics (BDVA)}, doi = {10.1109/BDVA.2016.7787045}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Roberts, Gough - 2016 - Communicating Statistical Uncertainty to Non-Expert Audiences Interactive Disease Mapping.pdf:pdf}, isbn = {978-1-5090-5272-1}, keywords = {data visualization}, mendeley-tags = {data visualization}, month = {nov}, pages = {1--3}, publisher = {IEEE}, title = {{Communicating Statistical Uncertainty to Non-Expert Audiences: Interactive Disease Mapping}}, url = {http://ieeexplore.ieee.org/document/7787045/}, year = {2016} } @article{Wesolowski2016, abstract = {{\textcopyright} The Author 2016.Human travel can shape infectious disease dynamics by introducing pathogens into susceptible populations or by changing the frequency of contacts between infected and susceptible individuals. Quantifying infectious disease-relevant travel patterns on fine spatial and temporal scales has historically been limited by data availability. The recent emergence of mobile phone calling data and associated locational information means that we can now trace fine scale movement across large numbers of individuals. However, these data necessarily reflect a biased sample of individuals across communities and are generally aggregated for both ethical and pragmatic reasons that may further obscure the nuance of individual and spatial heterogeneities. Additionally, as a general rule, the mobile phone data are not linked to demographic or social identifiers, or to information about the disease status of individual subscribers (although these may be made available in smaller-scale specific cases). Combining data on human movement from mobile phone data-derived population fluxes with data on disease incidence requires approaches that can tackle varying spatial and temporal resolutions of each data source and generate inference about dynamics on scales relevant to both pathogen biology and human ecology. Here, we review the opportunities and challenges of these novel data streams, illustrating our examples with analyses of 2 different pathogens in Kenya, and conclude by outlining core directions for future research.}, author = {Wesolowski, Amy and Buckee, Caroline O. and Eng{\o}-Monsen, Kenth and Metcalf, C. J. E.}, doi = {10.1093/infdis/jiw273}, issn = {0022-1899}, journal = {Journal of Infectious Diseases}, month = {dec}, number = {suppl 4}, pages = {S414--S420}, title = {{Connecting Mobility to Infectious Diseases: The Promise and Limits of Mobile Phone Data}}, url = {https://academic.oup.com/jid/article-lookup/doi/10.1093/infdis/jiw273}, volume = {214}, year = {2016} } @article{Young2017, abstract = {Copyright {\textcopyright} 2017 Wolters Kluwer Health, Inc. All rights reserved.Introduction: "Social big data" from technologies such as social media, wearable devices, and online searches continue to grow and can be used as tools for HIV research. Although researchers can uncover patterns and insights associated with HIV trends and transmission, the review process is time consuming and resource intensive. Machine learning methods derived from computer science might be used to assist HIV domain experts by learning how to rapidly and accurately identify patterns associated with HIV from a large set of social data. Methods: Using an existing social media data set that was associated with HIV and coded by an HIV domain expert, we tested whether 4 commonly used machine learning methods could learn the patterns associated with HIV risk behavior. We used the 10-fold cross-validation method to examine the speed and accuracy of these models in applying that knowledge to detect HIV content in social media data. Results and Discussion: Logistic regression and random forest resulted in the highest accuracy in detecting HIV-related social data (85.3{\%}), whereas the Ridge Regression Classifier resulted in the lowest accuracy. Logistic regression yielded the fastest processing time (16.98 seconds). Conclusions: Machine learning can enable social big data to become a new and important tool in HIV research, helping to create a new field of "digital HIV epidemiology." If a domain expert can identify patterns in social data associated with HIV risk or HIV transmission, machine learning models could quickly and accurately learn those associations and identify potential HIV patterns in large social data sets.}, author = {Young, S.D. and Yu, W. and Wang, W.}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Young, Yu, Wang - 2017 - Toward automating HIV identification Machine learning for rapid identification of HIV-related social media data.pdf:pdf}, journal = {Journal of Acquired Immune Deficiency Syndromes}, keywords = {classification}, mendeley-tags = {classification}, title = {{Toward automating HIV identification: Machine learning for rapid identification of HIV-related social media data}}, volume = {74}, year = {2017} } @inproceedings{Boonchieng2016, abstract = {{\textcopyright} 2016 IEEE.Health informatics is a new research area which is interdisciplinary amongst information science, computer science and healthcare. The concept of health informatics is to develop a new way to manipulate healthcare data from various resources and devices by optimizing the method of data acquisition, data storage, data processing, and data visualization. Community health informatics can be described as the systematic application of information and computer science to obtain valuable data for solving health problems and providing it to health policy makers. The challenge of community health informatics is to maximize the efficiency and efficacy of big data analysis. This discussion paper aims to present the various applications of machine learning and software engineering approaches that implemented in digital disease detection.}, author = {Boonchieng, E. and Duangchaemkarn, K.}, booktitle = {2016 13th International Joint Conference on Computer Science and Software Engineering, JCSSE 2016}, doi = {10.1109/JCSSE.2016.7748841}, isbn = {9781509020331}, title = {{Digital disease detection: Application of machine learning in community health informatics}}, year = {2016} } @article{Dewdney2017, abstract = {{\textcopyright} 2017 Dewdney and Lachance.Despite many perceived advances in treatment over the past few decades, cancer continues to present a significant health burden, particularly to the aging US population. Forces including shrinking funding mechanisms, cost and quality concerns, as well as disappointing clinical outcomes have driven a surge of recent efforts into utilizing the technological innovation that has permeated other industries by leveraging large and complex data sets, so called "big data." In this review, we will review some of the history of oncology data collection, including the earliest data registries, as well as explore the future directions of this new brand of research while highlighting some of the more recent and promising efforts to harness the power of the electronic health record and the multitude of data co-located there, in an effort to improve individualized cancer-related outcomes in rapid real time.}, author = {Dewdney, S.B. and Lachance, J.}, doi = {10.3389/fonc.2016.00268}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Dewdney, Lachance - 2017 - Electronic records, registries, and the development of big data Crowd-sourcing quality toward knowledge.pdf:pdf}, journal = {Frontiers in Oncology}, number = {JAN}, title = {{Electronic records, registries, and the development of "big data": Crowd-sourcing quality toward knowledge}}, volume = {6}, year = {2017} } @article{Hamada2017, abstract = {{\textcopyright} 2016, Japanese Society of Gastroenterology.Molecular pathological epidemiology (MPE) is an integrative field that utilizes molecular pathology to incorporate interpersonal heterogeneity of a disease process into epidemiology. In each individual, the development and progression of a disease are determined by a unique combination of exogenous and endogenous factors, resulting in different molecular and pathological subtypes of the disease. Based on “the unique disease principle,” the primary aim of MPE is to uncover an interactive relationship between a specific environmental exposure and disease subtypes in determining disease incidence and mortality. This MPE approach can provide etiologic and pathogenic insights, potentially contributing to precision medicine for personalized prevention and treatment. Although breast, prostate, lung, and colorectal cancers have been among the most commonly studied diseases, the MPE approach can be used to study any disease. In addition to molecular features, host immune status and microbiome profile likely affect a disease process, and thus serve as informative biomarkers. As such, further integration of several disciplines into MPE has been achieved (e.g., pharmaco-MPE, immuno-MPE, and microbial MPE), to provide novel insights into underlying etiologic mechanisms. With the advent of high-throughput sequencing technologies, available genomic and epigenomic data have expanded dramatically. The MPE approach can also provide a specific risk estimate for each disease subgroup, thereby enhancing the impact of genome-wide association studies on public health. In this article, we present recent progress of MPE, and discuss the importance of accounting for the disease heterogeneity in the era of big-data health science and precision medicine.}, author = {Hamada, T. and Keum, N.N. and Nishihara, R. and Ogino, S.}, doi = {10.1007/s00535-016-1272-3}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Hamada et al. - 2017 - Molecular pathological epidemiology new developing frontiers of big data science to study etiologies and pathogen.pdf:pdf}, journal = {Journal of Gastroenterology}, number = {3}, title = {{Molecular pathological epidemiology: new developing frontiers of big data science to study etiologies and pathogenesis}}, volume = {52}, year = {2017} } @article{Kalyanam2017, abstract = {{\textcopyright} 2016 Elsevier LtdIntroduction Nonmedical use of prescription medications/drugs (NMUPD) is a serious public health threat, particularly in relation to the prescription opioid analgesics abuse epidemic. While attention to this problem has been growing, there remains an urgent need to develop novel strategies in the field of “digital epidemiology” to better identify, analyze and understand trends in NMUPD behavior. Methods We conducted surveillance of the popular microblogging site Twitter by collecting 11 million tweets filtered for three commonly abused prescription opioid analgesic drugs Percocet{\textregistered} (acetaminophen/oxycodone), OxyContin{\textregistered} (oxycodone), and Oxycodone. Unsupervised machine learning was applied on the subset of tweets for each analgesic drug to discover underlying latent themes regarding risk behavior. A two-step process of obtaining themes, and filtering out unwanted tweets was carried out in three subsequent rounds of machine learning. Results Using this methodology, 2.3M tweets were identified that contained content relevant to analgesic NMUPD. The underlying themes were identified for each drug and the most representative tweets of each theme were annotated for NMUPD behavioral risk factors. The primary themes identified evidence high levels of social media discussion about polydrug abuse on Twitter. This included specific mention of various polydrug combinations including use of other classes of prescription drugs, and illicit drug abuse. Conclusions This study presents a methodology to filter Twitter content for NMUPD behavior, while also identifying underlying themes with minimal human intervention. Results from the study track accurately with the inclusion/exclusion criteria used to isolate NMUPD-related risk behaviors of interest and also provides insight on NMUPD behavior that has a high level of social media engagement. Results suggest that this could be a viable methodology for use in big data substance abuse surveillance, data collection, and analysis in comparison to other studies that rely upon content analysis and human coding schemes.}, author = {Kalyanam, J. and Katsuki, T. and {R.G. Lanckriet}, G. and Mackey, T.K.}, doi = {10.1016/j.addbeh.2016.08.019}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Kalyanam et al. - 2017 - Exploring trends of nonmedical use of prescription drugs and polydrug abuse in the Twittersphere using unsuperv.pdf:pdf}, journal = {Addictive Behaviors}, keywords = {social media}, mendeley-tags = {social media}, title = {{Exploring trends of nonmedical use of prescription drugs and polydrug abuse in the Twittersphere using unsupervised machine learning}}, volume = {65}, year = {2017} } @inproceedings{Shah2016, abstract = {{\textcopyright} 2016 IEEE.Planning for large-scale epidemiological outbreaks in livestock populations often involves executing compute-intensive disease spread simulations. To capture the probabilities of various outcomes, these simulations are executed several times over a collection of representative input scenarios, producing voluminous data. The resulting datasets contain valuable insights, including sequences of events that lead to extreme outbreaks. However, discovering and leveraging such information is also computationally expensive. In this study, we propose a distributed approach for analyzing voluminous epidemiology data to locate and classify the most influential entities in a disease outbreak. Using our disease transmission network (DTN), planners or analysts can isolate entities that have a disproportionate effect on epidemiological outcomes, enabling effective allocation of limited resources such as vaccinations and field personnel. We use a representative dataset to verify our approach, including identification of influential entities and creation of machine learning models for accurate classifications that generalize to other datasets.}, author = {Shah, Naman and Shah, Harshil and Malensek, Matthew and Pallickara, Sangmi Lee and Pallickara, Shrideep}, booktitle = {2016 IEEE International Conference on Big Data (Big Data)}, doi = {10.1109/BigData.2016.7840726}, isbn = {978-1-4673-9005-7}, month = {dec}, pages = {1222--1231}, publisher = {IEEE}, title = {{Network analysis for identifying and characterizing disease outbreak influence from voluminous epidemiology data}}, url = {http://ieeexplore.ieee.org/document/7840726/}, year = {2016} } @article{Moustakas2017, abstract = {{\textcopyright} 2017, Springer-Verlag Berlin Heidelberg.Understanding the spread of any disease is a highly complex and interdisciplinary exercise as biological, social, geographic, economic, and medical factors may shape the way a disease moves through a population and options for its eventual control or eradication. Disease spread poses a serious threat in animal and plant health and has implications for ecosystem functioning and species extinctions as well as implications in society through food security and potential disease spread in humans. Space–time epidemiology is based on the concept that various characteristics of the pathogenic agents and the environment interact in order to alter the probability of disease occurrence and form temporal or spatial patterns. Epidemiology aims to identify these patterns and factors, to assess the relevant uncertainty sources, and to describe disease in the population. Thus disease spread at the population level differs from the approach traditionally taken by veterinary practitioners that are principally concerned with the health status of the individual. Patterns of disease occurrence provide insights into which factors may be affecting the health of the population, through investigating which individuals are affected, where are these individuals located and when did they become infected. With the rapid development of smart sensors, social networks, as well as digital maps and remotely-sensed imagery spatio-temporal data are more ubiquitous and richer than ever before. The availability of such large datasets (big data) poses great challenges in data analysis. In addition, increased availability of computing power facilitates the use of computationally-intensive methods for the analysis of such data. Thus new methods as well as case studies are needed to understand veterinary and ecological epidemiology. A special issue aimed to address this topic.}, author = {Moustakas, Aristides}, doi = {10.1007/s00477-016-1374-8}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Moustakas - 2017 - Spatio-temporal data mining in ecological and veterinary epidemiology.pdf:pdf}, issn = {1436-3240}, journal = {Stochastic Environmental Research and Risk Assessment}, keywords = {spatial}, mendeley-tags = {spatial}, month = {may}, number = {4}, pages = {829--834}, title = {{Spatio-temporal data mining in ecological and veterinary epidemiology}}, url = {http://link.springer.com/10.1007/s00477-016-1374-8}, volume = {31}, year = {2017} } @article{Brownson2017, abstract = {{\textcopyright} 2016 Elsevier Inc.To extend the reach and relevance of epidemiology for public health practice, the science needs be broadened beyond etiologic research, to link more strongly with emerging technologies and to acknowledge key societal transformations. This new focus for epidemiology and its implications for epidemiologic training can be considered in the context of macro trends affecting society, including a greater focus on upstream causes of disease, shifting demographics, the Affordable Care Act and health care system reform, globalization, changing health communication environment, growing centrality of team and transdisciplinary science, emergence of translational sciences, greater focus on accountability, big data, informatics, high-throughput technologies (“omics”), privacy changes, and the evolving funding environment. This commentary describes existing approaches to and competencies for training in epidemiology, maps macro trends with competencies, highlights an example of competency-based education in the Epidemic Intelligence Service of Centers for Disease Control and Prevention, and suggests expanded and more dynamic training approaches. A reexamination of current approaches to epidemiologic training is needed.}, author = {Brownson, R.C. and Samet, J.M. and Bensyl, D.M.}, doi = {10.1016/j.annepidem.2016.12.002}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Brownson, Samet, Bensyl - 2017 - Applied epidemiology and public health are we training the future generations appropriately.pdf:pdf}, journal = {Annals of Epidemiology}, keywords = {biosurveillance}, mendeley-tags = {biosurveillance}, number = {2}, title = {{Applied epidemiology and public health: are we training the future generations appropriately?}}, volume = {27}, year = {2017} } @article{Moustakas2017a, abstract = {{\textcopyright} 2016, Springer-Verlag Berlin Heidelberg.Bovine tuberculosis (TB) poses a serious threat for agricultural industry in several countries, it involves potential interactions between wildlife and cattle and creates societal problems in terms of human-wildlife conflict. This study addresses connectedness network analysis, the spatial, and temporal dynamics of TB between cattle in farms and the European badger (Meles meles) using a large dataset generated by a calibrated agent based model. Results showed that infected network connectedness was lower in badgers than in cattle. The contribution of an infected individual to the mean distance of disease spread over time was considerably lower for badger than cattle; badgers mainly spread the disease locally while cattle infected both locally and across longer distances. The majority of badger-induced infections occurred when individual badgers leave their home sett, and this was positively correlated with badger population growth rates. Point pattern analysis indicated aggregation in the spatial pattern of TB prevalence in badger setts across all scales. The spatial distribution of farms that were not TB free was aggregated at different scales than the spatial distribution of infected badgers and became random at larger scales. The spatial cross correlation between infected badger setts and infected farms revealed that generally infected setts and farms do not coexist except at few scales. Temporal autocorrelation detected a two year infection cycle for badgers, while there was both within the year and longer cycles for infected cattle. Temporal cross correlation indicated that infection cycles in badgers and cattle are negatively correlated. The implications of these results for understanding the dynamics of the disease are discussed.}, author = {Moustakas, Aristides and Evans, Matthew R.}, doi = {10.1007/s00477-016-1311-x}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Moustakas, Evans - 2017 - A big-data spatial, temporal and network analysis of bovine tuberculosis between wildlife (badgers) and cattle.pdf:pdf}, issn = {1436-3240}, journal = {Stochastic Environmental Research and Risk Assessment}, month = {feb}, number = {2}, pages = {315--328}, title = {{A big-data spatial, temporal and network analysis of bovine tuberculosis between wildlife (badgers) and cattle}}, url = {http://link.springer.com/10.1007/s00477-016-1311-x}, volume = {31}, year = {2017} } @article{Gogoshin2017, abstract = {{\textcopyright} 2017 Mary Ann Liebert, Inc.Bayesian network (BN) reconstruction is a prototypical systems biology data analysis approach that has been successfully used to reverse engineer and model networks reflecting different layers of biological organization (ranging from genetic to epigenetic to cellular pathway to metabolomic). It is especially relevant in the context of modern (ongoing and prospective) studies that generate heterogeneous high-throughput omics datasets. However, there are both theoretical and practical obstacles to the seamless application of BN modeling to such big data, including computational inefficiency of optimal BN structure search algorithms, ambiguity in data discretization, mixing data types, imputation and validation, and, in general, limited scalability in both reconstruction and visualization of BNs. To overcome these and other obstacles, we present BNOmics, an improved algorithm and software toolkit for inferring and analyzing BNs from omics datasets. BNOmics aims at comprehensive systems biology-type data exploration, including both generating new biological hypothesis and testing and validating the existing ones. Novel aspects of the algorithm center around increasing scalability and applicability to varying data types (with different explicit and implicit distributional assumptions) within the same analysis framework. An output and visualization interface to widely available graph-rendering software is also included. Three diverse applications are detailed. BNOmics was originally developed in the context of genetic epidemiology data and is being continuously optimized to keep pace with the ever-increasing inflow of available large-scale omics datasets. As such, the software scalability and usability on the less than exotic computer hardware are a priority, as well as the applicability of the algorithm and software to the heterogeneous datasets containing many data types-single-nucleotide polymorphisms and other genetic/epigenetic/transcriptome variables, metabolite levels, epidemiological variables, endpoints, and phenotypes, etc.}, author = {Gogoshin, G. and Boerwinkle, E. and Rodin, A.S.}, doi = {10.1089/cmb.2016.0100}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Gogoshin, Boerwinkle, Rodin - 2017 - New Algorithm and Software (BNOmics) for Inferring and Visualizing Bayesian Networks from Heterogen.pdf:pdf}, journal = {Journal of Computational Biology}, keywords = {Bayesian networks}, mendeley-tags = {Bayesian networks}, number = {4}, title = {{New Algorithm and Software (BNOmics) for Inferring and Visualizing Bayesian Networks from Heterogeneous Big Biological and Genetic Data}}, volume = {24}, year = {2017} } @article{Gu2017, abstract = {{\textcopyright} 2016 Elsevier Ireland LtdBackground In recent years, the literature associated with healthcare big data has grown rapidly, but few studies have used bibliometrics and a visualization approach to conduct deep mining and reveal a panorama of the healthcare big data field. Methods To explore the foundational knowledge and research hotspots of big data research in the field of healthcare informatics, this study conducted a series of bibliometric analyses on the related literature, including papers' production trends in the field and the trend of each paper's co-author number, the distribution of core institutions and countries, the core literature distribution, the related information of prolific authors and innovation paths in the field, a keyword co-occurrence analysis, and research hotspots and trends for the future. Results By conducting a literature content analysis and structure analysis, we found the following: (a) In the early stage, researchers from the United States, the People's Republic of China, the United Kingdom, and Germany made the most contributions to the literature associated with healthcare big data research and the innovation path in this field. (b) The innovation path in healthcare big data consists of three stages: the disease early detection, diagnosis, treatment, and prognosis phase, the life and health promotion phase, and the nursing phase. (c) Research hotspots are mainly concentrated in three dimensions: the disease dimension (e.g., epidemiology, breast cancer, obesity, and diabetes), the technical dimension (e.g., data mining and machine learning), and the health service dimension (e.g., customized service and elderly nursing). Conclusion This study will provide scholars in the healthcare informatics community with panoramic knowledge of healthcare big data research, as well as research hotspots and future research directions.}, author = {Gu, D. and Li, J. and Li, X. and Liang, C.}, doi = {10.1016/j.ijmedinf.2016.11.006}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Gu et al. - 2017 - Visualizing the knowledge structure and evolution of big data research in healthcare informatics.pdf:pdf}, journal = {International Journal of Medical Informatics}, keywords = {big data}, mendeley-tags = {big data}, title = {{Visualizing the knowledge structure and evolution of big data research in healthcare informatics}}, volume = {98}, year = {2017} } @article{Marvin2017, abstract = {ABSTRACTTechnology is now being developed that is able to handle vast amounts of structured and unstructured data from diverse sources and origins. These technologies are often referred to as big data, and open new areas of research and applications that will have an increasing impact in all sectors of our society. In this paper we assessed to which extent big data is being applied in the food safety domain and identified several promising trends. In several parts of the world, governments stimulate the publication on internet of all data generated in public funded research projects. This policy opens new opportunities for stakeholders dealing with food safety to address issues which were not possible before. Application of mobile phones as detection devices for food safety and the use of social media as early warning of food safety problems are a few examples of the new developments that are possible due to big data.}, author = {Marvin, Hans J. P. and Janssen, Esm{\'{e}}e M. and Bouzembrak, Yamine and Hendriksen, Peter J. M. and Staats, Martijn}, doi = {10.1080/10408398.2016.1257481}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Marvin et al. - 2017 - Big data in food safety An overview.pdf:pdf}, issn = {1040-8398}, journal = {Critical Reviews in Food Science and Nutrition}, keywords = {Big data,big data,database,food safety,new technologies}, mendeley-tags = {big data}, month = {jul}, number = {11}, pages = {2286--2295}, publisher = {Taylor {\&} Francis}, title = {{Big data in food safety: An overview}}, url = {https://www.tandfonline.com/doi/full/10.1080/10408398.2016.1257481}, volume = {57}, year = {2017} } @article{Brownstein2009, abstract = {John Brownstein, Clark Freifeld, and Lawrence Madoff write that a new generation of disease-surveillance “mashups” can mine, categorize, filter, and visualize online intelligence about epidemics in real time.}, author = {Brownstein, John S. and Freifeld, Clark C. and Madoff, Lawrence C.}, doi = {10.1056/NEJMp0900702}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Brownstein, Freifeld, Madoff - 2009 - Digital Disease Detection — Harnessing the Web for Public Health Surveillance.pdf:pdf}, issn = {0028-4793}, journal = {New England Journal of Medicine}, keywords = {webcrawl}, mendeley-tags = {webcrawl}, month = {may}, number = {21}, pages = {2153--2157}, publisher = { Massachusetts Medical Society }, title = {{Digital Disease Detection — Harnessing the Web for Public Health Surveillance}}, url = {http://www.nejm.org/doi/abs/10.1056/NEJMp0900702}, volume = {360}, year = {2009} } @article{Chan2010, abstract = {The increasing number of emerging infectious disease events that have spread internationally, such as severe acute respiratory syndrome (SARS) and the 2009 pandemic A/H1N1, highlight the need for improvements in global outbreak surveillance. It is expected that the proliferation of Internet-based reports has resulted in greater communication and improved surveillance and reporting frameworks, especially with the revision of the World Health Organization's (WHO) International Health Regulations (IHR 2005), which went into force in 2007. However, there has been no global quantitative assessment of whether and how outbreak detection and communication processes have actually changed over time. In this study, we analyzed the entire WHO public record of Disease Outbreak News reports from 1996 to 2009 to characterize spatial-temporal trends in the timeliness of outbreak discovery and public communication about the outbreak relative to the estimated outbreak start date. Cox proportional hazards regression analyses show that overall, the timeliness of outbreak discovery improved by 7.3{\%} [hazard ratio (HR) = 1.073, 95{\%} CI (1.038; 1.110)] per year, and public communication improved by 6.2{\%} [HR = 1.062, 95{\%} CI (1.028; 1.096)] per year. However, the degree of improvement varied by geographic region; the only WHO region with statistically significant ($\alpha$ = 0.05) improvement in outbreak discovery was the Western Pacific region [HR = 1.102 per year, 95{\%} CI (1.008; 1.205)], whereas the Eastern Mediterranean [HR = 1.201 per year, 95{\%} CI (1.066; 1.353)] and Western Pacific regions [HR = 1.119 per year, 95{\%} CI (1.025; 1.221)] showed improvement in public communication. These findings provide quantitative historical assessment of timeliness in infectious disease detection and public reporting of outbreaks.}, author = {Chan, E. H. and Brewer, T. F. and Madoff, L. C. and Pollack, M. P. and Sonricker, A. L. and Keller, M. and Freifeld, C. C. and Blench, M. and Mawudeku, A. and Brownstein, J. S.}, doi = {10.1073/pnas.1006219107}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Chan et al. - 2010 - Global capacity for emerging infectious disease detection.pdf:pdf}, issn = {0027-8424}, journal = {Proceedings of the National Academy of Sciences}, keywords = {biosurveillance,reporting}, mendeley-tags = {biosurveillance,reporting}, month = {dec}, number = {50}, pages = {21701--21706}, pmid = {21115835}, title = {{Global capacity for emerging infectious disease detection}}, url = {http://www.ncbi.nlm.nih.gov/pubmed/21115835 http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=PMC3003006 http://www.pnas.org/cgi/doi/10.1073/pnas.1006219107}, volume = {107}, year = {2010} } @article{Han2016, author = {Han, Barbara A and Drake, John M}, doi = {10.15252/embr.201642534}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Han, Drake - 2016 - Future directions in analytics for infectious disease intelligence.pdf:pdf}, issn = {1469-221X}, journal = {EMBO reports}, keywords = {big data,biosurveillance,ddds,emerging diseases}, mendeley-tags = {big data,biosurveillance,ddds,emerging diseases}, month = {jun}, number = {6}, pages = {785--789}, title = {{Future directions in analytics for infectious disease intelligence}}, url = {http://embor.embopress.org/lookup/doi/10.15252/embr.201642534}, volume = {17}, year = {2016} } @article{Halevy2009, author = {Halevy, Alon and Norvig, Peter and Pereira, Fernando}, doi = {10.1109/MIS.2009.36}, file = {:C$\backslash$:/Users/fernanda.dorea/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Halevy, Norvig, Pereira - 2009 - The Unreasonable Effectiveness of Data.pdf:pdf}, issn = {1541-1672}, journal = {IEEE Intelligent Systems}, keywords = {Semantic Web,machine learning,very large data bases}, month = {mar}, number = {2}, pages = {8--12}, publisher = {IEEE Educational Activities Department}, title = {{The Unreasonable Effectiveness of Data}}, url = {http://ieeexplore.ieee.org/document/4804817/}, volume = {24}, year = {2009} }