2023
|
Javier González, Cliff Wong, Zelalem Gero, Jass Bagga, Risa Ueno, Isabel Chien, Eduard Orakvin, Emre Kıcıman, Aditya Nori, Roshanthi Weerasinghe, Rom S Leidner, Brian Piening, Tristan Naumann, Carlo Bifulco, Hoifung Poon TRIALSCOPE: A Unifying Causal Framework for Scaling Real-World Evidence Generation with Biomedical Language Models Working paper arXiv, 2023. @workingpaper{nokey,
title = {TRIALSCOPE: A Unifying Causal Framework for Scaling Real-World Evidence Generation with Biomedical Language Models},
author = {Javier González and Cliff Wong and Zelalem Gero and Jass Bagga and Risa Ueno and Isabel Chien and Eduard Orakvin and Emre Kıcıman and Aditya Nori and Roshanthi Weerasinghe and Rom S Leidner and Brian Piening and Tristan Naumann and Carlo Bifulco and Hoifung Poon},
url = {https://arxiv.org/abs/2311.01301},
doi = { https://doi.org/10.48550/arXiv.2311.01301},
year = {2023},
date = {2023-11-02},
urldate = {2023-11-02},
abstract = {The rapid digitization of real-world data offers an unprecedented opportunity for optimizing healthcare delivery and accelerating biomedical discovery. In practice, however, such data is most abundantly available in unstructured forms, such as clinical notes in electronic medical records (EMRs), and it is generally plagued by confounders. In this paper, we present TRIALSCOPE, a unifying framework for distilling real-world evidence from population-level observational data. TRIALSCOPE leverages biomedical language models to structure clinical text at scale, employs advanced probabilistic modeling for denoising and imputation, and incorporates state-of-the-art causal inference techniques to combat common confounders. Using clinical trial specification as generic representation, TRIALSCOPE provides a turn-key solution to generate and reason with clinical hypotheses using observational data. In extensive experiments and analyses on a large-scale real-world dataset with over one million cancer patients from a large US healthcare network, we show that TRIALSCOPE can produce high-quality structuring of real-world data and generates comparable results to marquee cancer trials. In addition to facilitating in-silicon clinical trial design and optimization, TRIALSCOPE may be used to empower synthetic controls, pragmatic trials, post-market surveillance, as well as support fine-grained patient-like-me reasoning in precision diagnosis and treatment.},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
The rapid digitization of real-world data offers an unprecedented opportunity for optimizing healthcare delivery and accelerating biomedical discovery. In practice, however, such data is most abundantly available in unstructured forms, such as clinical notes in electronic medical records (EMRs), and it is generally plagued by confounders. In this paper, we present TRIALSCOPE, a unifying framework for distilling real-world evidence from population-level observational data. TRIALSCOPE leverages biomedical language models to structure clinical text at scale, employs advanced probabilistic modeling for denoising and imputation, and incorporates state-of-the-art causal inference techniques to combat common confounders. Using clinical trial specification as generic representation, TRIALSCOPE provides a turn-key solution to generate and reason with clinical hypotheses using observational data. In extensive experiments and analyses on a large-scale real-world dataset with over one million cancer patients from a large US healthcare network, we show that TRIALSCOPE can produce high-quality structuring of real-world data and generates comparable results to marquee cancer trials. In addition to facilitating in-silicon clinical trial design and optimization, TRIALSCOPE may be used to empower synthetic controls, pragmatic trials, post-market surveillance, as well as support fine-grained patient-like-me reasoning in precision diagnosis and treatment. |
Iván Díaz, Hana Lee, Emre Kıcıman, Mouna Akacha, Dean Follman, Debashis Ghosh
Sensitivity analysis for causality in observational studies for regulatory science Working paper arXiv, 2023. @workingpaper{Díaz2023,
title = {Sensitivity analysis for causality in observational studies for regulatory science},
author = {Iván Díaz and Hana Lee and Emre Kıcıman and Mouna Akacha and Dean Follman and Debashis Ghosh
},
url = {https://arxiv.org/abs/2310.03176},
doi = { https://doi.org/10.48550/arXiv.2310.03176},
year = {2023},
date = {2023-10-04},
abstract = {Recognizing the importance of real-world data (RWD) for regulatory purposes, the United States (US) Congress passed the 21st Century Cures Act1 mandating the development of Food and Drug Administration (FDA) guidance on regulatory use of real-world evidence. The Forum on the Integration of Observational and Randomized Data (FIORD) conducted a meeting bringing together various stakeholder groups to build consensus around best practices for the use of RWD to support regulatory science. Our companion paper describes in detail the context and discussion carried out in the meeting, which includes a recommendation to use a causal roadmap for complete pre-specification of study designs using RWD. This article discusses one step of the roadmap: the specification of a procedure for sensitivity analysis, defined as a procedure for testing the robustness of substantive conclusions to violations of assumptions made in the causal roadmap. We include a worked-out example of a sensitivity analysis from a RWD study on the effectiveness of Nifurtimox in treating Chagas disease, as well as an overview of various methods available for sensitivity analysis in causal inference, emphasizing practical considerations on their use for regulatory purposes.},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Recognizing the importance of real-world data (RWD) for regulatory purposes, the United States (US) Congress passed the 21st Century Cures Act1 mandating the development of Food and Drug Administration (FDA) guidance on regulatory use of real-world evidence. The Forum on the Integration of Observational and Randomized Data (FIORD) conducted a meeting bringing together various stakeholder groups to build consensus around best practices for the use of RWD to support regulatory science. Our companion paper describes in detail the context and discussion carried out in the meeting, which includes a recommendation to use a causal roadmap for complete pre-specification of study designs using RWD. This article discusses one step of the roadmap: the specification of a procedure for sensitivity analysis, defined as a procedure for testing the robustness of substantive conclusions to violations of assumptions made in the causal roadmap. We include a worked-out example of a sensitivity analysis from a RWD study on the effectiveness of Nifurtimox in treating Chagas disease, as well as an overview of various methods available for sensitivity analysis in causal inference, emphasizing practical considerations on their use for regulatory purposes. |
Kristina Gligoric, Arnaud Chiolero, Emre Kıcıman, Ryen W. White, Eric Horvitz, Robert West Food Choice Mimicry on a Large University Campus Working paper 2023. @workingpaper{nokey,
title = {Food Choice Mimicry on a Large University Campus},
author = {Kristina Gligoric and Arnaud Chiolero and Emre Kıcıman and Ryen W. White and Eric Horvitz and Robert West},
url = {https://arxiv.org/abs/2308.16095},
doi = { https://doi.org/10.48550/arXiv.2308.16095},
year = {2023},
date = {2023-08-30},
abstract = {Social influence is a strong determinant of food consumption, which in turn influences health. Although consistent observations have been made on the role of social factors in driving similarities in food consumption, much less is known about the precise governing mechanisms. We study social influence on food choice through carefully designed causal analyses, leveraging the sequential nature of shop queues on a major university campus. In particular, we consider a large number of adjacent purchases where a focal user immediately follows another user ("partner") in the checkout queue and both make a purchase. Identifying the partner's impact on the focal user, we find strong evidence of a specific behavioral mechanism for how dietary similarities between individuals arise: purchasing mimicry, a phenomenon where the focal user copies the partner's purchases. For instance, across food additions purchased during lunchtime together with a meal, we find that the focal user is significantly more likely to purchase the food item when the partner buys the item, v.s. when the partner does not, increasing the purchasing probability by 14% in absolute terms, or by 83% in relative terms. The effect is observed across all food types, but largest for condiments, and smallest for soft drinks. We find that no such effect is observed when a focal user is compared to a random (rather than directly preceding) partner. Furthermore, purchasing mimicry is present across age, gender, and status subpopulations, but strongest for students and the youngest persons. Finally, we find a dose-response relationship whereby mimicry decreases as proximity in the purchasing queue decreases. The results of this study elucidate the behavioral mechanism of purchasing mimicry and have further implications for understanding and improving dietary behaviors on campus.},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Social influence is a strong determinant of food consumption, which in turn influences health. Although consistent observations have been made on the role of social factors in driving similarities in food consumption, much less is known about the precise governing mechanisms. We study social influence on food choice through carefully designed causal analyses, leveraging the sequential nature of shop queues on a major university campus. In particular, we consider a large number of adjacent purchases where a focal user immediately follows another user ("partner") in the checkout queue and both make a purchase. Identifying the partner's impact on the focal user, we find strong evidence of a specific behavioral mechanism for how dietary similarities between individuals arise: purchasing mimicry, a phenomenon where the focal user copies the partner's purchases. For instance, across food additions purchased during lunchtime together with a meal, we find that the focal user is significantly more likely to purchase the food item when the partner buys the item, v.s. when the partner does not, increasing the purchasing probability by 14% in absolute terms, or by 83% in relative terms. The effect is observed across all food types, but largest for condiments, and smallest for soft drinks. We find that no such effect is observed when a focal user is compared to a random (rather than directly preceding) partner. Furthermore, purchasing mimicry is present across age, gender, and status subpopulations, but strongest for students and the youngest persons. Finally, we find a dose-response relationship whereby mimicry decreases as proximity in the purchasing queue decreases. The results of this study elucidate the behavioral mechanism of purchasing mimicry and have further implications for understanding and improving dietary behaviors on campus. |
Somya Sharma, Swati Sharma, Licheng Liu, Rishabh Tushir, Andy Neal, Robert Ness, John Crawford, Emre Kıcıman, Ranveer Chandra Knowledge Guided Representation Learning and Causal Structure Learning in Soil Science Working paper arXiv, 2023. @workingpaper{nokey,
title = {Knowledge Guided Representation Learning and Causal Structure Learning in Soil Science},
author = {Somya Sharma and Swati Sharma and Licheng Liu and Rishabh Tushir and Andy Neal and Robert Ness and John Crawford and Emre Kıcıman and Ranveer Chandra},
url = {https://arxiv.org/abs/2306.09302},
doi = { https://doi.org/10.48550/arXiv.2306.09302},
year = {2023},
date = {2023-06-15},
abstract = {An improved understanding of soil can enable more sustainable land-use practices. Nevertheless, soil is called a complex, living medium due to the complex interaction of different soil processes that limit our understanding of soil. Process-based models and analyzing observed data provide two avenues for improving our understanding of soil processes. Collecting observed data is cost-prohibitive but reflects real-world behavior, while process-based models can be used to generate ample synthetic data which may not be representative of reality. We propose a framework, knowledge-guided representation learning, and causal structure learning (KGRCL), to accelerate scientific discoveries in soil science. The framework improves representation learning for simulated soil processes via conditional distribution matching with observed soil processes. Simultaneously, the framework leverages both observed and simulated data to learn a causal structure among the soil processes. The learned causal graph is more representative of ground truth than other graphs generated from other causal discovery methods. Furthermore, the learned causal graph is leveraged in a supervised learning setup to predict the impact of fertilizer use and changing weather on soil carbon. We present the results in five different locations to show the improvement in the prediction performance in out-of-sample and few-shots setting.},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
An improved understanding of soil can enable more sustainable land-use practices. Nevertheless, soil is called a complex, living medium due to the complex interaction of different soil processes that limit our understanding of soil. Process-based models and analyzing observed data provide two avenues for improving our understanding of soil processes. Collecting observed data is cost-prohibitive but reflects real-world behavior, while process-based models can be used to generate ample synthetic data which may not be representative of reality. We propose a framework, knowledge-guided representation learning, and causal structure learning (KGRCL), to accelerate scientific discoveries in soil science. The framework improves representation learning for simulated soil processes via conditional distribution matching with observed soil processes. Simultaneously, the framework leverages both observed and simulated data to learn a causal structure among the soil processes. The learned causal graph is more representative of ground truth than other graphs generated from other causal discovery methods. Furthermore, the learned causal graph is leveraged in a supervised learning setup to predict the impact of fertilizer use and changing weather on soil carbon. We present the results in five different locations to show the improvement in the prediction performance in out-of-sample and few-shots setting. |
Lauren E Dang, Susan Gruber, Hana Lee, Issa Dahabreh, Elizabeth A Stuart, Brian D Williamson, Richard Wyss, Iván Díaz, Debashis Ghosh, Emre Kıcıman, Demissie Alemayehu, Katherine L Hoffman, Carla Y Vossen, Raymond A Huml, Henrik Ravn, Kajsa Kvist, Richard Pratley, Mei-Chiung Shih, Gene Pennello, David Martin, Salina P Waddy, Charles E Barr, Mouna Akacha, John B Buse, Mark van der Laan, Maya Petersen A Causal Roadmap for Generating High-Quality Real-World Evidence Miscellaneous arXiv, 2023. @misc{dang2023causalroadmap,
title = {A Causal Roadmap for Generating High-Quality Real-World Evidence},
author = {Lauren E Dang and Susan Gruber and Hana Lee and Issa Dahabreh and Elizabeth A Stuart and Brian D Williamson and Richard Wyss and Iván Díaz and Debashis Ghosh and Emre Kıcıman and Demissie Alemayehu and Katherine L Hoffman and Carla Y Vossen and Raymond A Huml and Henrik Ravn and Kajsa Kvist and Richard Pratley and Mei-Chiung Shih and Gene Pennello and David Martin and Salina P Waddy and Charles E Barr and Mouna Akacha and John B Buse and Mark van der Laan and Maya Petersen},
url = {https://arxiv.org/abs/2305.06850},
doi = {https://doi.org/10.48550/arXiv.2305.06850},
year = {2023},
date = {2023-05-11},
urldate = {2023-05-11},
abstract = {Increasing emphasis on the use of real-world evidence (RWE) to support clinical policy and regulatory decision-making has led to a proliferation of guidance, advice, and frameworks from regulatory agencies, academia, professional societies, and industry. A broad spectrum of studies use real-world data (RWD) to produce RWE, ranging from randomized controlled trials with outcomes assessed using RWD to fully observational studies. Yet many RWE study proposals lack sufficient detail to evaluate adequacy, and many analyses of RWD suffer from implausible assumptions, other methodological flaws, or inappropriate interpretations. The Causal Roadmap is an explicit, itemized, iterative process that guides investigators to pre-specify analytic study designs; it addresses a wide range of guidance within a single framework. By requiring transparent evaluation of causal assumptions and facilitating objective comparisons of design and analysis choices based on pre-specified criteria, the Roadmap can help investigators to evaluate the quality of evidence that a given study is likely to produce, specify a study to generate high-quality RWE, and communicate effectively with regulatory agencies and other stakeholders. This paper aims to disseminate and extend the Causal Roadmap framework for use by clinical and translational researchers, with companion papers demonstrating application of the Causal Roadmap for specific use cases.},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Increasing emphasis on the use of real-world evidence (RWE) to support clinical policy and regulatory decision-making has led to a proliferation of guidance, advice, and frameworks from regulatory agencies, academia, professional societies, and industry. A broad spectrum of studies use real-world data (RWD) to produce RWE, ranging from randomized controlled trials with outcomes assessed using RWD to fully observational studies. Yet many RWE study proposals lack sufficient detail to evaluate adequacy, and many analyses of RWD suffer from implausible assumptions, other methodological flaws, or inappropriate interpretations. The Causal Roadmap is an explicit, itemized, iterative process that guides investigators to pre-specify analytic study designs; it addresses a wide range of guidance within a single framework. By requiring transparent evaluation of causal assumptions and facilitating objective comparisons of design and analysis choices based on pre-specified criteria, the Roadmap can help investigators to evaluate the quality of evidence that a given study is likely to produce, specify a study to generate high-quality RWE, and communicate effectively with regulatory agencies and other stakeholders. This paper aims to disseminate and extend the Causal Roadmap framework for use by clinical and translational researchers, with companion papers demonstrating application of the Causal Roadmap for specific use cases. |
Martin Josifoski, Maxime Peyrard, Frano Rajič, Jiheng Wei, Debjit Paul, Valentin Hartmann, Barun Patra, Vishrav Chaudhary, Emre Kiciman, Boi Faltings, Robert West Language Model Decoding as Likelihood-Utility Alignment Proceedings Article In: In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2023), 2023. @inproceedings{josifoski2023lmdecoding,
title = {Language Model Decoding as Likelihood-Utility Alignment},
author = {Martin Josifoski and Maxime Peyrard and Frano Rajič and Jiheng Wei and Debjit Paul and Valentin Hartmann and Barun Patra and Vishrav Chaudhary and Emre Kiciman and Boi Faltings and Robert West},
url = {https://arxiv.org/abs/2210.07228},
year = {2023},
date = {2023-05-02},
urldate = {2023-05-02},
booktitle = {In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics (EACL-2023)},
abstract = {A critical component of a successful language generation pipeline is the decoding algorithm. However, the general principles that should guide the choice of decoding algorithm remain unclear. Previous works only compare decoding algorithms in narrow scenarios and their findings do not generalize across tasks. To better structure the discussion, we introduce a taxonomy that groups decoding strategies based on their implicit assumptions about how well the model's likelihood is aligned with the task-specific notion of utility. We argue that this taxonomy allows a broader view of the decoding problem and can lead to generalizable statements because it is grounded on the interplay between the decoding algorithms and the likelihood-utility misalignment. Specifically, by analyzing the correlation between the likelihood and the utility of predictions across a diverse set of tasks, we provide the first empirical evidence supporting the proposed taxonomy, and a set of principles to structure reasoning when choosing a decoding algorithm. Crucially, our analysis is the first one to relate likelihood-based decoding strategies with strategies that rely on external information such as value-guided methods and prompting, and covers the most diverse set of tasks up-to-date.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
A critical component of a successful language generation pipeline is the decoding algorithm. However, the general principles that should guide the choice of decoding algorithm remain unclear. Previous works only compare decoding algorithms in narrow scenarios and their findings do not generalize across tasks. To better structure the discussion, we introduce a taxonomy that groups decoding strategies based on their implicit assumptions about how well the model's likelihood is aligned with the task-specific notion of utility. We argue that this taxonomy allows a broader view of the decoding problem and can lead to generalizable statements because it is grounded on the interplay between the decoding algorithms and the likelihood-utility misalignment. Specifically, by analyzing the correlation between the likelihood and the utility of predictions across a diverse set of tasks, we provide the first empirical evidence supporting the proposed taxonomy, and a set of principles to structure reasoning when choosing a decoding algorithm. Crucially, our analysis is the first one to relate likelihood-based decoding strategies with strategies that rely on external information such as value-guided methods and prompting, and covers the most diverse set of tasks up-to-date. |
Jivat Neet Kaur, Emre Kıcıman, Amit Sharma Modeling the Data-Generating Process is Necessary for Out-of-Distribution Generalization Proceedings In Proceedings of ICLR 2023 (Notable Top 25%), 2023. @proceedings{Kaur2023OOD,
title = {Modeling the Data-Generating Process is Necessary for Out-of-Distribution Generalization},
author = {Jivat Neet Kaur and Emre Kıcıman and Amit Sharma},
url = {https://arxiv.org/abs/2206.07837},
year = {2023},
date = {2023-05-01},
urldate = {2023-05-01},
abstract = {Recent empirical studies on domain generalization (DG) have shown that DG algorithms that perform well on some distribution shifts fail on others, and no state-of-the-art DG algorithm performs consistently well on all shifts. Moreover, real-world data often has multiple distribution shifts over different attributes; hence we introduce multi-attribute distribution shift datasets and find that the accuracy of existing DG algorithms falls even further. To explain these results, we provide a formal characterization of generalization under multi-attribute shifts using a canonical causal graph. Based on the relationship between spurious attributes and the classification label, we obtain realizations of the canonical causal graph that characterize common distribution shifts and show that each shift entails different independence constraints over observed variables. As a result, we prove that any algorithm based on a single, fixed constraint cannot work well across all shifts, providing theoretical evidence for mixed empirical results on DG algorithms. Based on this insight, we develop Causally Adaptive Constraint Minimization (CACM), an algorithm that uses knowledge about the data-generating process to adaptively identify and apply the correct independence constraints for regularization. Results on fully synthetic, MNIST, small NORB, and Waterbirds datasets, covering binary and multi-valued attributes and labels, show that adaptive dataset-dependent constraints lead to the highest accuracy on unseen domains whereas incorrect constraints fail to do so. Our results demonstrate the importance of modeling the causal relationships inherent in the data-generating process.},
howpublished = {In Proceedings of ICLR 2023 (Notable Top 25%)},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
Recent empirical studies on domain generalization (DG) have shown that DG algorithms that perform well on some distribution shifts fail on others, and no state-of-the-art DG algorithm performs consistently well on all shifts. Moreover, real-world data often has multiple distribution shifts over different attributes; hence we introduce multi-attribute distribution shift datasets and find that the accuracy of existing DG algorithms falls even further. To explain these results, we provide a formal characterization of generalization under multi-attribute shifts using a canonical causal graph. Based on the relationship between spurious attributes and the classification label, we obtain realizations of the canonical causal graph that characterize common distribution shifts and show that each shift entails different independence constraints over observed variables. As a result, we prove that any algorithm based on a single, fixed constraint cannot work well across all shifts, providing theoretical evidence for mixed empirical results on DG algorithms. Based on this insight, we develop Causally Adaptive Constraint Minimization (CACM), an algorithm that uses knowledge about the data-generating process to adaptively identify and apply the correct independence constraints for regularization. Results on fully synthetic, MNIST, small NORB, and Waterbirds datasets, covering binary and multi-valued attributes and labels, show that adaptive dataset-dependent constraints lead to the highest accuracy on unseen domains whereas incorrect constraints fail to do so. Our results demonstrate the importance of modeling the causal relationships inherent in the data-generating process. |
Emre Kıcıman, Robert Osazuwa Ness, Amit Sharma, Chenhao Tan Causal Reasoning and Large Language Models: Opening a New Frontier for Causality Working paper arXiv, 2023. @workingpaper{causalllm2023,
title = {Causal Reasoning and Large Language Models: Opening a New Frontier for Causality},
author = {Emre Kıcıman and Robert Osazuwa Ness and Amit Sharma and Chenhao Tan},
url = {https://arxiv.org/abs/2305.00050},
doi = {https://doi.org/10.48550/arXiv.2305.00050},
year = {2023},
date = {2023-04-28},
urldate = {2023-04-28},
abstract = {The causal capabilities of large language models (LLMs) is a matter of significant debate, with critical implications for the use of LLMs in societally impactful domains such as medicine, science, law, and policy. We further our understanding of LLMs and their causal implications, considering the distinctions between different types of causal reasoning tasks, as well as the entangled threats of construct and measurement validity. LLM-based methods establish new state-of-the-art accuracies on multiple causal benchmarks. Algorithms based on GPT-3.5 and 4 outperform existing algorithms on a pairwise causal discovery task (97%, 13 points gain), counterfactual reasoning task (92%, 20 points gain), and actual causality (86% accuracy in determining necessary and sufficient causes in vignettes). At the same time, LLMs exhibit unpredictable failure modes and we provide some techniques to interpret their robustness.
Crucially, LLMs perform these causal tasks while relying on sources of knowledge and methods distinct from and complementary to non-LLM based approaches. Specifically, LLMs bring capabilities so far understood to be restricted to humans, such as using collected knowledge to generate causal graphs or identifying background causal context from natural language. We envision LLMs to be used alongside existing causal methods, as a proxy for human domain knowledge and to reduce human effort in setting up a causal analysis, one of the biggest impediments to the widespread adoption of causal methods. We also see existing causal methods as promising tools for LLMs to formalize, validate, and communicate their reasoning especially in high-stakes scenarios.
In capturing common sense and domain knowledge about causal mechanisms and supporting translation between natural language and formal methods, LLMs open new frontiers for advancing the research, practice, and adoption of causality.},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
The causal capabilities of large language models (LLMs) is a matter of significant debate, with critical implications for the use of LLMs in societally impactful domains such as medicine, science, law, and policy. We further our understanding of LLMs and their causal implications, considering the distinctions between different types of causal reasoning tasks, as well as the entangled threats of construct and measurement validity. LLM-based methods establish new state-of-the-art accuracies on multiple causal benchmarks. Algorithms based on GPT-3.5 and 4 outperform existing algorithms on a pairwise causal discovery task (97%, 13 points gain), counterfactual reasoning task (92%, 20 points gain), and actual causality (86% accuracy in determining necessary and sufficient causes in vignettes). At the same time, LLMs exhibit unpredictable failure modes and we provide some techniques to interpret their robustness.
Crucially, LLMs perform these causal tasks while relying on sources of knowledge and methods distinct from and complementary to non-LLM based approaches. Specifically, LLMs bring capabilities so far understood to be restricted to humans, such as using collected knowledge to generate causal graphs or identifying background causal context from natural language. We envision LLMs to be used alongside existing causal methods, as a proxy for human domain knowledge and to reduce human effort in setting up a causal analysis, one of the biggest impediments to the widespread adoption of causal methods. We also see existing causal methods as promising tools for LLMs to formalize, validate, and communicate their reasoning especially in high-stakes scenarios.
In capturing common sense and domain knowledge about causal mechanisms and supporting translation between natural language and formal methods, LLMs open new frontiers for advancing the research, practice, and adoption of causality. |
Arash Nasr-Esfahany, Emre Kıcıman Counterfactual (Non-)identifiability of Learned Structural Causal Models Working paper 2023. @workingpaper{nasresfahanay2023counterfactual,
title = {Counterfactual (Non-)identifiability of Learned Structural Causal Models},
author = {Arash Nasr-Esfahany and Emre Kıcıman},
url = {https://arxiv.org/abs/2301.09031},
doi = {https://doi.org/10.48550/arXiv.2301.09031},
year = {2023},
date = {2023-01-22},
abstract = {Recent advances in probabilistic generative modeling have motivated learning Structural Causal Models (SCM) from observational datasets using deep conditional generative models, also known as Deep Structural Causal Models (DSCM). If successful, DSCMs can be utilized for causal estimation tasks, e.g., for answering counterfactual queries. In this work, we warn practitioners about non-identifiability of counterfactual inference from observational data, even in the absence of unobserved confounding and assuming known causal structure. We prove counterfactual identifiability of monotonic generation mechanisms with single dimensional exogenous variables. For general generation mechanisms with multi-dimensional exogenous variables, we provide an impossibility result for counterfactual identifiability, motivating the need for parametric assumptions. As a practical approach, we propose a method for estimating worst-case errors of learned DSCMs' counterfactual predictions. The size of this error can be an essential metric for deciding whether or not DSCMs are a viable approach for counterfactual inference in a specific problem setting. In evaluation, our method confirms negligible counterfactual errors for an identifiable SCM from prior work, and also provides informative error bounds on counterfactual errors for a non-identifiable synthetic SCM.},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Recent advances in probabilistic generative modeling have motivated learning Structural Causal Models (SCM) from observational datasets using deep conditional generative models, also known as Deep Structural Causal Models (DSCM). If successful, DSCMs can be utilized for causal estimation tasks, e.g., for answering counterfactual queries. In this work, we warn practitioners about non-identifiability of counterfactual inference from observational data, even in the absence of unobserved confounding and assuming known causal structure. We prove counterfactual identifiability of monotonic generation mechanisms with single dimensional exogenous variables. For general generation mechanisms with multi-dimensional exogenous variables, we provide an impossibility result for counterfactual identifiability, motivating the need for parametric assumptions. As a practical approach, we propose a method for estimating worst-case errors of learned DSCMs' counterfactual predictions. The size of this error can be an essential metric for deciding whether or not DSCMs are a viable approach for counterfactual inference in a specific problem setting. In evaluation, our method confirms negligible counterfactual errors for an identifiable SCM from prior work, and also provides informative error bounds on counterfactual errors for a non-identifiable synthetic SCM. |
Koustuv Saha, Pranshu Gupta, Gloria Mark, Emre Kıcıman, Munmun De Choudhury Observer Effect in Social Media Use Working paper 2023. @workingpaper{saha2023observereffect,
title = {Observer Effect in Social Media Use},
author = {Koustuv Saha and Pranshu Gupta and Gloria Mark and Emre Kıcıman and Munmun De Choudhury},
url = {https://www.researchsquare.com/article/rs-2492994/v1},
doi = {https://doi.org/10.21203/rs.3.rs-2492994/v1},
year = {2023},
date = {2023-01-19},
abstract = {Research has revealed the potential of social media as a source of large-scale, verbal, and naturalistic data for human behavior both in real-time and longitudinally. However, the in-practice utility of social media to assess and support wellbeing will only be realized when we account for extraneous factors. A factor that might confound our ability to make inferences is the phenomenon of the ``observer effect''---that individuals may deviate from their otherwise typical social media use because of the awareness of being monitored. This paper conducts a causal study to measure the observer effect in longitudinal social media use. We operationalized the observer effect in two dimensions of social media (Facebook) use---behavioral and linguistic changes. Participants consented to Facebook data collection over an average retrospective period of 82 months and an average prospective period of 5 months around the enrollment date to our study. We measured how they deviated from their expected social media use after enrollment. We obtained expected use by extrapolating from historical use using time-series (ARIMA) forecasting. We find that the deviation in social media use varies across individuals based on their psychological traits. Individuals with high cognitive ability and low neuroticism immediately decreased posting after enrollment, and those with high openness significantly increased posting. Linguistically, most individuals decreased the use of first-person pronouns, reflecting lowered sharing of intimate and self-attentional content. While some increased posting about public-facing events, others increased posting about family and social gatherings. We validate the observed changes with respect to psychological traits drawing from psychology and behavioral science theories, such as self-monitoring, public self-consciousness, and self-presentation. The findings provide recommendations to correct observer effects in social media data-driven assessments of human behavior.},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Research has revealed the potential of social media as a source of large-scale, verbal, and naturalistic data for human behavior both in real-time and longitudinally. However, the in-practice utility of social media to assess and support wellbeing will only be realized when we account for extraneous factors. A factor that might confound our ability to make inferences is the phenomenon of the ``observer effect''---that individuals may deviate from their otherwise typical social media use because of the awareness of being monitored. This paper conducts a causal study to measure the observer effect in longitudinal social media use. We operationalized the observer effect in two dimensions of social media (Facebook) use---behavioral and linguistic changes. Participants consented to Facebook data collection over an average retrospective period of 82 months and an average prospective period of 5 months around the enrollment date to our study. We measured how they deviated from their expected social media use after enrollment. We obtained expected use by extrapolating from historical use using time-series (ARIMA) forecasting. We find that the deviation in social media use varies across individuals based on their psychological traits. Individuals with high cognitive ability and low neuroticism immediately decreased posting after enrollment, and those with high openness significantly increased posting. Linguistically, most individuals decreased the use of first-person pronouns, reflecting lowered sharing of intimate and self-attentional content. While some increased posting about public-facing events, others increased posting about family and social gatherings. We validate the observed changes with respect to psychological traits drawing from psychology and behavioral science theories, such as self-monitoring, public self-consciousness, and self-presentation. The findings provide recommendations to correct observer effects in social media data-driven assessments of human behavior. |
2022
|
Maxim Peyrard, Sarvjeet Singh Ghotra, Martin Josifoski, Vidhan Agarwal, Barun Patra, Dean Carignan, Emre Kıcıman, Saurabh Tiwary, Robert West Invariant Language Modeling Proceedings Article In: In Proceedings of Empirical Methods in Natural Language Processing (EMNLP-2022), 2022. @inproceedings{Peyrard2022InvariantLanguageModeling,
title = {Invariant Language Modeling},
author = {Maxim Peyrard and Sarvjeet Singh Ghotra and Martin Josifoski and Vidhan Agarwal and Barun Patra and Dean Carignan and Emre Kıcıman and Saurabh Tiwary and Robert West},
url = {https://arxiv.org/abs/2110.08413},
year = {2022},
date = {2022-12-07},
urldate = {2022-12-07},
booktitle = {In Proceedings of Empirical Methods in Natural Language Processing (EMNLP-2022)},
abstract = {Large pretrained language models are critical components of modern NLP pipelines. Yet, they suffer from spurious correlations, poor out-of-domain generalization, and biases. Inspired by recent progress in causal machine learning, in particular the invariant risk minimization (IRM) paradigm, we propose emph{invariant language modeling}, a framework for learning invariant representations that generalize better across multiple environments. In particular, we adapt a game-theoretic formulation of IRM (IRM-games) to language models, where the invariance emerges from a specific training schedule in which all the environments compete to optimize their own environment-specific loss by updating subsets of the model in a round-robin fashion. We focus on controlled experiments to precisely demonstrate the ability of our method to (i) remove structured noise, (ii) ignore specific spurious correlations without affecting global performance, and (iii) achieve better out-of-domain generalization. These benefits come with a negligible computational overhead compared to standard training, do not require changing the local loss, and can be applied to any language model. We believe this framework is promising to help mitigate spurious correlations and biases in language models.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Large pretrained language models are critical components of modern NLP pipelines. Yet, they suffer from spurious correlations, poor out-of-domain generalization, and biases. Inspired by recent progress in causal machine learning, in particular the invariant risk minimization (IRM) paradigm, we propose emph{invariant language modeling}, a framework for learning invariant representations that generalize better across multiple environments. In particular, we adapt a game-theoretic formulation of IRM (IRM-games) to language models, where the invariance emerges from a specific training schedule in which all the environments compete to optimize their own environment-specific loss by updating subsets of the model in a round-robin fashion. We focus on controlled experiments to precisely demonstrate the ability of our method to (i) remove structured noise, (ii) ignore specific spurious correlations without affecting global performance, and (iii) achieve better out-of-domain generalization. These benefits come with a negligible computational overhead compared to standard training, do not require changing the local loss, and can be applied to any language model. We believe this framework is promising to help mitigate spurious correlations and biases in language models. |
Somya Sharma, Swati Sharma, Andy Neal, Sara Malvar, Eduardo Rodrigues, John Crawford, Emre Kiciman, Ranveer Chandra Causal Modeling of Soil Processes for Improved Generalization Workshop NeurIPS 2022 Workshop: Tackling Climate Change with Machine Learning, 2022. @workshop{sharma2022causalsoil,
title = {Causal Modeling of Soil Processes for Improved Generalization},
author = {Somya Sharma and Swati Sharma and Andy Neal and Sara Malvar and Eduardo Rodrigues and John Crawford and Emre Kiciman and Ranveer Chandra},
url = {https://www.climatechange.ai/papers/neurips2022/53
https://arxiv.org/abs/2211.05675
https://kiciman.org/wp-content/uploads/2023/01/2211.05675.pdf},
year = {2022},
date = {2022-11-10},
urldate = {2022-11-10},
booktitle = {NeurIPS 2022 Workshop: Tackling Climate Change with Machine Learning},
abstract = {Measuring and monitoring soil organic carbon is critical for agricultural productivity and for addressing critical environmental problems. Soil organic carbon not only enriches nutrition in soil, but also has a gamut of co-benefits such as improving water storage and limiting physical erosion. Despite a litany of work in soil organic carbon estimation, current approaches do not generalize well across soil conditions and management practices. We empirically show that explicit modeling of cause-and-effect relationships among the soil processes improves the out-of-distribution generalizability of prediction models. We provide a comparative analysis of soil organic carbon estimation models where the skeleton is estimated using causal discovery methods. Our framework provides an average improvement of 81% in test mean squared error and 52% in test mean absolute error.},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
Measuring and monitoring soil organic carbon is critical for agricultural productivity and for addressing critical environmental problems. Soil organic carbon not only enriches nutrition in soil, but also has a gamut of co-benefits such as improving water storage and limiting physical erosion. Despite a litany of work in soil organic carbon estimation, current approaches do not generalize well across soil conditions and management practices. We empirically show that explicit modeling of cause-and-effect relationships among the soil processes improves the out-of-distribution generalizability of prediction models. We provide a comparative analysis of soil organic carbon estimation models where the skeleton is estimated using causal discovery methods. Our framework provides an average improvement of 81% in test mean squared error and 52% in test mean absolute error. |
Parikshit Bansal, Yashoteja Prabhu, Emre Kıcıman, Amit Sharma Using Interventions to Improve Out-of-Distribution Generalization of Text-Matching Recommendation Systems Workshop 2022. @workshop{Bansal2022Interventions,
title = {Using Interventions to Improve Out-of-Distribution Generalization of Text-Matching Recommendation Systems},
author = {Parikshit Bansal and Yashoteja Prabhu and Emre Kıcıman and Amit Sharma},
url = {https://arxiv.org/abs/2210.10636},
doi = {https://doi.org/10.48550/arXiv.2210.10636},
year = {2022},
date = {2022-10-07},
journal = {NeurIPS 2022 Workshop on Distribution Shifts (DistShift)},
abstract = {Given a user's input text, text-matching recommender systems output relevant items by comparing the input text to available items' description, such as product-to-product recommendation on e-commerce platforms. As users' interests and item inventory are expected to change, it is important for a text-matching system to generalize to data shifts, a task known as out-of-distribution (OOD) generalization. However, we find that the popular approach of fine-tuning a large, base language model on paired item relevance data (e.g., user clicks) can be counter-productive for OOD generalization. For a product recommendation task, fine-tuning obtains worse accuracy than the base model when recommending items in a new category or for a future time period. To explain this generalization failure, we consider an intervention-based importance metric, which shows that a fine-tuned model captures spurious correlations and fails to learn the causal features that determine the relevance between any two text inputs. Moreover, standard methods for causal regularization do not apply in this setting, because unlike in images, there exist no universally spurious features in a text-matching task (the same token may be spurious or causal depending on the text it is being matched to). For OOD generalization on text inputs, therefore, we highlight a different goal: avoiding high importance scores for certain features. We do so using an intervention-based regularizer that constraints the causal effect of any token on the model's relevance score to be similar to the base model. Results on Amazon product and 3 question recommendation datasets show that our proposed regularizer improves generalization for both in-distribution and OOD evaluation, especially in difficult scenarios when the base model is not accurate.},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
Given a user's input text, text-matching recommender systems output relevant items by comparing the input text to available items' description, such as product-to-product recommendation on e-commerce platforms. As users' interests and item inventory are expected to change, it is important for a text-matching system to generalize to data shifts, a task known as out-of-distribution (OOD) generalization. However, we find that the popular approach of fine-tuning a large, base language model on paired item relevance data (e.g., user clicks) can be counter-productive for OOD generalization. For a product recommendation task, fine-tuning obtains worse accuracy than the base model when recommending items in a new category or for a future time period. To explain this generalization failure, we consider an intervention-based importance metric, which shows that a fine-tuned model captures spurious correlations and fails to learn the causal features that determine the relevance between any two text inputs. Moreover, standard methods for causal regularization do not apply in this setting, because unlike in images, there exist no universally spurious features in a text-matching task (the same token may be spurious or causal depending on the text it is being matched to). For OOD generalization on text inputs, therefore, we highlight a different goal: avoiding high importance scores for certain features. We do so using an intervention-based regularizer that constraints the causal effect of any token on the model's relevance score to be similar to the base model. Results on Amazon product and 3 question recommendation datasets show that our proposed regularizer improves generalization for both in-distribution and OOD evaluation, especially in difficult scenarios when the base model is not accurate. |
Jivat Neet Kaur, Emre Kıcıman, Amit Sharma Modeling the Data-Generating Process is Necessary for Out-of-Distribution Generalization Workshop 2022. @workshop{CACM2022,
title = {Modeling the Data-Generating Process is Necessary for Out-of-Distribution Generalization},
author = {Jivat Neet Kaur and Emre Kıcıman and Amit Sharma},
url = {https://kiciman.org/wp-content/uploads/2022/07/2206.07837.pdf},
year = {2022},
date = {2022-07-19},
abstract = {Real-world data collected from multiple domains can have multiple, distinct distribution shifts over multiple attributes. However, state-of-the art advances in domain generalization (DG) algorithms focus only on specific shifts over a single attribute. We introduce datasets with multi-attribute distribution shifts and find that existing DG algorithms fail to generalize. To explain this, we use causal graphs to characterize the different types of shifts based on the relationship between spurious attributes and the classification label. Each multi-attribute causal graph entails different constraints over observed variables, and therefore any algorithm based on a single, fixed independence constraint cannot work well across all shifts. We present Causally Adaptive Constraint Minimization (CACM), a new algorithm for identifying the correct independence constraints for regularization. Results on fully synthetic, MNIST and small NORB datasets, covering binary and multi-valued attributes and labels, confirm our theoretical claim: correct independence constraints lead to the highest accuracy on unseen domains whereas incorrect constraints fail to do so. Our results demonstrate the importance of modeling the causal relationships inherent in the data-generating process: in many cases, it is impossible to know the correct regularization constraints without this information.},
howpublished = {Workshop on Spurious Correlations, Invariance, and Stability},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
Real-world data collected from multiple domains can have multiple, distinct distribution shifts over multiple attributes. However, state-of-the art advances in domain generalization (DG) algorithms focus only on specific shifts over a single attribute. We introduce datasets with multi-attribute distribution shifts and find that existing DG algorithms fail to generalize. To explain this, we use causal graphs to characterize the different types of shifts based on the relationship between spurious attributes and the classification label. Each multi-attribute causal graph entails different constraints over observed variables, and therefore any algorithm based on a single, fixed independence constraint cannot work well across all shifts. We present Causally Adaptive Constraint Minimization (CACM), a new algorithm for identifying the correct independence constraints for regularization. Results on fully synthetic, MNIST and small NORB datasets, covering binary and multi-valued attributes and labels, confirm our theoretical claim: correct independence constraints lead to the highest accuracy on unseen domains whereas incorrect constraints fail to do so. Our results demonstrate the importance of modeling the causal relationships inherent in the data-generating process: in many cases, it is impossible to know the correct regularization constraints without this information. |
Razieh Nabi, Joel Pfeiffer, Denis Charles, Emre Kıcıman Causal Inference in the Presence of Interference in Sponsored Search Advertising Journal Article In: Frontiers in Big Data, vol. 5, 2022, ISSN: 2624-909X. @article{nabi2022,
title = {Causal Inference in the Presence of Interference in Sponsored Search Advertising},
author = {Razieh Nabi and Joel Pfeiffer and Denis Charles and Emre Kıcıman},
editor = {Elena Zheleva},
url = {https://kiciman.org/wp-content/uploads/2022/07/fdata-05-888592.pdf},
doi = {10.3389/fdata.2022.888592},
issn = {2624-909X},
year = {2022},
date = {2022-06-21},
urldate = {2022-06-21},
journal = {Frontiers in Big Data},
volume = {5},
abstract = {In classical causal inference, inferring cause-effect relations from data relies on the assumption that units are independent and identically distributed. This assumption is violated in settings where units are related through a network of dependencies. An example of such a setting is ad placement in sponsored search advertising, where the likelihood of a user clicking on a particular ad is potentially influenced by where it is placed and where other ads are placed on the search result page. In such scenarios, confounding arises due to not only the individual ad-level covariates but also the placements and covariates of other ads in the system. In this paper, we leverage the language of causal inference in the presence of interference to model interactions among the ads. Quantification of such interactions allows us to better understand the click behavior of users, which in turn impacts the revenue of the host search engine and enhances user satisfaction. We illustrate the utility of our formalization through experiments carried out on the ad placement system of the Bing search engine.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
In classical causal inference, inferring cause-effect relations from data relies on the assumption that units are independent and identically distributed. This assumption is violated in settings where units are related through a network of dependencies. An example of such a setting is ad placement in sponsored search advertising, where the likelihood of a user clicking on a particular ad is potentially influenced by where it is placed and where other ads are placed on the search result page. In such scenarios, confounding arises due to not only the individual ad-level covariates but also the placements and covariates of other ads in the system. In this paper, we leverage the language of causal inference in the presence of interference to model interactions among the ads. Quantification of such interactions allows us to better understand the click behavior of users, which in turn impacts the revenue of the host search engine and enhances user satisfaction. We illustrate the utility of our formalization through experiments carried out on the ad placement system of the Bing search engine. |
Kristina Gligorić, Arnaud Chiolero, Emre Kıcıman, Ryen W. White, Robert West Population-scale dietary interests during the COVID-19 pandemic Journal Article In: Nature Communications, vol. 13, no. 1073, 2022. @article{Gligoric2022,
title = {Population-scale dietary interests during the COVID-19 pandemic},
author = {Kristina Gligorić and Arnaud Chiolero and Emre Kıcıman and Ryen W. White and Robert West},
url = {https://rdcu.be/cHUrX},
doi = {https://doi.org/10.1038/s41467-022-28498-z},
year = {2022},
date = {2022-02-28},
journal = {Nature Communications},
volume = {13},
number = {1073},
abstract = {The SARS-CoV-2 virus has altered people’s lives around the world. Here we document population-wide shifts in dietary interests in 18 countries in 2020, as revealed through time series of Google search volumes. We find that during the first wave of the COVID-19 pandemic there was an overall surge in food interest, larger and longer-lasting than the surge during typical end-of-year holidays in Western countries. The shock of decreased mobility manifested as a drastic increase in interest in consuming food at home and a corresponding decrease in consuming food outside of home. The largest (up to threefold) increases occurred for calorie-dense carbohydrate-based foods such as pastries, bakery products, bread, and pies. The observed shifts in dietary interests have the potential to globally affect food consumption and health outcomes. These findings can inform governmental and organizational decisions regarding measures to mitigate the effects of the COVID-19 pandemic on diet and nutrition.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
The SARS-CoV-2 virus has altered people’s lives around the world. Here we document population-wide shifts in dietary interests in 18 countries in 2020, as revealed through time series of Google search volumes. We find that during the first wave of the COVID-19 pandemic there was an overall surge in food interest, larger and longer-lasting than the surge during typical end-of-year holidays in Western countries. The shock of decreased mobility manifested as a drastic increase in interest in consuming food at home and a corresponding decrease in consuming food outside of home. The largest (up to threefold) increases occurred for calorie-dense carbohydrate-based foods such as pastries, bakery products, bread, and pies. The observed shifts in dietary interests have the potential to globally affect food consumption and health outcomes. These findings can inform governmental and organizational decisions regarding measures to mitigate the effects of the COVID-19 pandemic on diet and nutrition. |
Andi Peng, Besmira Nushi, Emre Kıcıman, Kori Inkpen, Ece Kamar Investigations of Performance and Bias in Human-AI Teamwork in Hiring Conference AAAI 2022, AAAI, 2022. @conference{Peng2022,
title = {Investigations of Performance and Bias in Human-AI Teamwork in Hiring},
author = {Andi Peng and Besmira Nushi and Emre Kıcıman and Kori Inkpen and Ece Kamar},
url = {https://kiciman.org/wp-content/uploads/2022/02/2202.11812.pdf},
year = {2022},
date = {2022-02-08},
booktitle = {AAAI 2022},
publisher = {AAAI},
abstract = {In AI-assisted decision-making, effective hybrid (human-AI) teamwork is not solely dependent on AI performance alone, but also on its impact on human decision-making. While prior work studies the effects of model accuracy on humans, we endeavour here to investigate the complex dynamics of how both a model’s predictive performance and bias may transfer to humans in a recommendation-aided decision task. We consider the domain of ML-assisted hiring, where humans—operating in a constrained selection setting—can choose whether they wish to utilize a trained model’s inferences to help select candidates from written biographies. We conduct a large-scale user study leveraging a re-created dataset of real bios from prior work, where humans predict the ground truth occupation of given candidates with and without the help of three different NLP classifiers (random, bag-of-words, and deep neural network). Our results demonstrate that while high-performance models significantly improve human performance in a hybrid setting, some models mitigate hybrid bias while others accentuate it. We examine these findings through the lens of decision conformity and observe that our model architecture choices have an impact on human-AI conformity and bias, motivating the explicit need to assess these complex dynamics prior to deployment.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
In AI-assisted decision-making, effective hybrid (human-AI) teamwork is not solely dependent on AI performance alone, but also on its impact on human decision-making. While prior work studies the effects of model accuracy on humans, we endeavour here to investigate the complex dynamics of how both a model’s predictive performance and bias may transfer to humans in a recommendation-aided decision task. We consider the domain of ML-assisted hiring, where humans—operating in a constrained selection setting—can choose whether they wish to utilize a trained model’s inferences to help select candidates from written biographies. We conduct a large-scale user study leveraging a re-created dataset of real bios from prior work, where humans predict the ground truth occupation of given candidates with and without the help of three different NLP classifiers (random, bag-of-words, and deep neural network). Our results demonstrate that while high-performance models significantly improve human performance in a hybrid setting, some models mitigate hybrid bias while others accentuate it. We examine these findings through the lens of decision conformity and observe that our model architecture choices have an impact on human-AI conformity and bias, motivating the explicit need to assess these complex dynamics prior to deployment. |
Tomas Geffner, Javier Antoran, Adam Foster, Wenbo Gong, Chao Ma, Emre Kiciman, Amit Sharma, Angus Lamb, Martin Kukla, Nick Pawlowski, Miltiadis Allamanis, Cheng Zhang Deep End-to-end Causal Inference Working paper arXiv preprint arXiv:2202.02195, 2022. @workingpaper{deci2022,
title = {Deep End-to-end Causal Inference},
author = {Tomas Geffner, Javier Antoran, Adam Foster, Wenbo Gong, Chao Ma, Emre Kiciman, Amit Sharma, Angus Lamb, Martin Kukla, Nick Pawlowski, Miltiadis Allamanis, Cheng Zhang},
url = {https://arxiv.org/abs/2202.02195},
year = {2022},
date = {2022-02-04},
abstract = {Causal inference is essential for data-driven decision making across domains such as business engagement, medical treatment or policy making. However, research on causal discovery and inference has evolved separately, and the combination of the two domains is not trivial. In this work, we develop Deep End-to-end Causal Inference (DECI), a single flow-based method that takes in observational data and can perform both causal discovery and inference, including conditional average treatment effect (CATE) estimation. We provide a theoretical guarantee that DECI can recover the ground truth causal graph under mild assumptions. In addition, our method can handle heterogeneous, real-world, mixed-type data with missing values, allowing for both continuous and discrete treatment decisions. Moreover, the design principle of our method can generalize beyond DECI, providing a general End-to-end Causal Inference (ECI) recipe, which enables different ECI frameworks to be built using existing methods. Our results show the superior performance of DECI when compared to relevant baselines for both causal discovery and (C)ATE estimation in over a thousand experiments on both synthetic datasets and other causal machine learning benchmark datasets.},
howpublished = {arXiv preprint arXiv:2202.02195},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Causal inference is essential for data-driven decision making across domains such as business engagement, medical treatment or policy making. However, research on causal discovery and inference has evolved separately, and the combination of the two domains is not trivial. In this work, we develop Deep End-to-end Causal Inference (DECI), a single flow-based method that takes in observational data and can perform both causal discovery and inference, including conditional average treatment effect (CATE) estimation. We provide a theoretical guarantee that DECI can recover the ground truth causal graph under mild assumptions. In addition, our method can handle heterogeneous, real-world, mixed-type data with missing values, allowing for both continuous and discrete treatment decisions. Moreover, the design principle of our method can generalize beyond DECI, providing a general End-to-end Causal Inference (ECI) recipe, which enables different ECI frameworks to be built using existing methods. Our results show the superior performance of DECI when compared to relevant baselines for both causal discovery and (C)ATE estimation in over a thousand experiments on both synthetic datasets and other causal machine learning benchmark datasets. |
2021
|
Kristina Gligorić, Ryen W White, Emre Kıcıman, Eric Horvitz, Arnaud Chiolero, Robert West Formation of Social Ties Predicts Food Choice: A Campus-wide Longitudinal Study Conference ACM Conference on Computer-Supported Cooperative Work and Social Computing CSCW, ACM 2021, (Honorable Mention). @conference{gligoric2021,
title = {Formation of Social Ties Predicts Food Choice: A Campus-wide Longitudinal Study},
author = {Kristina Gligorić and Ryen W White and Emre Kıcıman and Eric Horvitz and Arnaud Chiolero and Robert West},
url = {https://kiciman.org/wp-content/uploads/2021/02/Gligoric-White-Kiciman-Horvitz-Chiolero-West_CSCW-21.pdf},
year = {2021},
date = {2021-10-23},
booktitle = {ACM Conference on Computer-Supported Cooperative Work and Social Computing CSCW},
organization = {ACM},
abstract = {Nutrition is a key determinant of long-term health, and social influence has long been theorized to be a key determinant of nutrition. It has been difficult to quantify the postulated role of social influence on nutrition using traditional methods such as surveys, due to the typically small scale and short duration of studies. To overcome these limitations, we leverage a novel source of data: logs of 38 million food purchases made over an 8-year period on the École Polytechnique Fédérale de Lausanne (EPFL) university campus, linked to anonymized individuals via the smartcards used to make on-campus purchases. In a longitudinal observational study, we ask: How is a person’s food choice affected by eating with someone else whose own food choice is healthy vs. unhealthy? To estimate causal effects from the passively observed log data, we control confounds in a matched quasi experimental design: we identify focal users who at first do not have any regular eating partners but then start eating with a fixed partner regularly, and we match focal users into comparison pairs such that paired users are nearly identical with respect to covariates measured before acquiring the partner, where the two focal users’ new eating partners diverge in the healthiness of their respective food choice. A difference-in-differences analysis of the paired data yields clear evidence of social influence: focal users acquiring a healthy-eating partner change their habits significantly more toward healthy foods than focal users acquiring an unhealthy-eating partner. We further identify foods whose purchase frequency is impacted significantly by the eating partner’s healthiness of food choice. Beyond the main results, the work demonstrates the utility of passively sensed food purchase logs for deriving insights, with the potential of informing the design of public health interventions and food offerings, especially on university campuses.},
note = {Honorable Mention},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Nutrition is a key determinant of long-term health, and social influence has long been theorized to be a key determinant of nutrition. It has been difficult to quantify the postulated role of social influence on nutrition using traditional methods such as surveys, due to the typically small scale and short duration of studies. To overcome these limitations, we leverage a novel source of data: logs of 38 million food purchases made over an 8-year period on the École Polytechnique Fédérale de Lausanne (EPFL) university campus, linked to anonymized individuals via the smartcards used to make on-campus purchases. In a longitudinal observational study, we ask: How is a person’s food choice affected by eating with someone else whose own food choice is healthy vs. unhealthy? To estimate causal effects from the passively observed log data, we control confounds in a matched quasi experimental design: we identify focal users who at first do not have any regular eating partners but then start eating with a fixed partner regularly, and we match focal users into comparison pairs such that paired users are nearly identical with respect to covariates measured before acquiring the partner, where the two focal users’ new eating partners diverge in the healthiness of their respective food choice. A difference-in-differences analysis of the paired data yields clear evidence of social influence: focal users acquiring a healthy-eating partner change their habits significantly more toward healthy foods than focal users acquiring an unhealthy-eating partner. We further identify foods whose purchase frequency is impacted significantly by the eating partner’s healthiness of food choice. Beyond the main results, the work demonstrates the utility of passively sensed food purchase logs for deriving insights, with the potential of informing the design of public health interventions and food offerings, especially on university campuses. |
Maxime Peyrard, Sarvjeet Singh Ghotra, Martin Josifoski, Vidhan Agarwal, Barun Patra, Dean Carignan, Emre Kiciman, Robert West Invariant Language Modeling Working paper arXiv preprint arXiv:2110.08413, 2021. @workingpaper{InvariantLanguageModeling,
title = {Invariant Language Modeling},
author = {Maxime Peyrard, Sarvjeet Singh Ghotra, Martin Josifoski, Vidhan Agarwal, Barun Patra, Dean Carignan, Emre Kiciman, Robert West},
url = {https://arxiv.org/abs/2110.08413},
year = {2021},
date = {2021-10-16},
urldate = {2021-10-16},
abstract = {Modern pretrained language models are critical components of NLP pipelines. Yet, they suffer from spurious correlations, poor out-of-domain generalization, and biases. Inspired by recent progress in causal machine learning, in particular the invariant risk minimization (IRM) paradigm, we propose invariant language modeling, a framework for learning invariant representations that generalize better across multiple environments. In particular, we adapt a game-theoretic implementation of IRM (IRM-games) to language models, where the invariance emerges from a specific training schedule in which all the environments compete to optimize their own environment-specific loss by updating subsets of the model in a round-robin fashion. In a series of controlled experiments, we demonstrate the ability of our method to (i) remove structured noise, (ii) ignore specific spurious correlations without affecting global performance, and (iii) achieve better out-of-domain generalization. These benefits come with a negligible computational overhead compared to standard training, do not require changing the local loss, and can be applied to any language model architecture. We believe this framework is promising to help mitigate spurious correlations and biases in language models.},
howpublished = {arXiv preprint arXiv:2110.08413},
keywords = {},
pubstate = {published},
tppubtype = {workingpaper}
}
Modern pretrained language models are critical components of NLP pipelines. Yet, they suffer from spurious correlations, poor out-of-domain generalization, and biases. Inspired by recent progress in causal machine learning, in particular the invariant risk minimization (IRM) paradigm, we propose invariant language modeling, a framework for learning invariant representations that generalize better across multiple environments. In particular, we adapt a game-theoretic implementation of IRM (IRM-games) to language models, where the invariance emerges from a specific training schedule in which all the environments compete to optimize their own environment-specific loss by updating subsets of the model in a round-robin fashion. In a series of controlled experiments, we demonstrate the ability of our method to (i) remove structured noise, (ii) ignore specific spurious correlations without affecting global performance, and (iii) achieve better out-of-domain generalization. These benefits come with a negligible computational overhead compared to standard training, do not require changing the local loss, and can be applied to any language model architecture. We believe this framework is promising to help mitigate spurious correlations and biases in language models. |
Amit Sharma, Vasilis Syrgkanis, Cheng Zhang, Emre Kıcıman DoWhy: Addressing Challenges in Expressing and Validating Causal Assumptions Workshop ICML 2021 Workshop on The Neglected Assumptions in Causal Inference, 2021. @workshop{dowhyassumptions,
title = {DoWhy: Addressing Challenges in Expressing and Validating Causal Assumptions},
author = {Amit Sharma and Vasilis Syrgkanis and Cheng Zhang and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2021/07/dowhy_icml_causal_assumptions_workshop_2021.pdf
https://arxiv.org/abs/2108.13518},
year = {2021},
date = {2021-07-23},
booktitle = {ICML 2021 Workshop on The Neglected Assumptions in Causal Inference},
abstract = {Estimation of causal effects involves crucial assumptions about the data-generating process, such as directionality of effect, presence of instrumental variables or mediators, and whether all relevant confounders are observed. Violation of any of these assumptions leads to significant error in the effect estimate. However, unlike cross-validation for predictive models, there is no global validator method for a causal estimate. As a result, expressing different causal assumptions formally and validating them (to the extent possible) becomes critical for any analysis. We present DoWhy, a framework that allows explicit declaration of assumptions through a causal graph and provides multiple validation tests to check a subset of these assumptions. Our experience with DoWhy highlights a number of open questions for future research: developing new ways beyond causal graphs to express assumptions, the role of causal discovery in learning relevant parts of the graph, and developing validation tests that can better detect errors, both for average and conditional treatment effects. DoWhy is available at https://github.com/microsoft/dowhy.},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
Estimation of causal effects involves crucial assumptions about the data-generating process, such as directionality of effect, presence of instrumental variables or mediators, and whether all relevant confounders are observed. Violation of any of these assumptions leads to significant error in the effect estimate. However, unlike cross-validation for predictive models, there is no global validator method for a causal estimate. As a result, expressing different causal assumptions formally and validating them (to the extent possible) becomes critical for any analysis. We present DoWhy, a framework that allows explicit declaration of assumptions through a causal graph and provides multiple validation tests to check a subset of these assumptions. Our experience with DoWhy highlights a number of open questions for future research: developing new ways beyond causal graphs to express assumptions, the role of causal discovery in learning relevant parts of the graph, and developing validation tests that can better detect errors, both for average and conditional treatment effects. DoWhy is available at https://github.com/microsoft/dowhy. |
Claudia Wagner, Markus Strohmaier, Alexandra Olteanu, Emre Kıcıman, Noshir Contractor, Tina Eliassi-Rad Measuring algorithmically infused societies Journal Article In: Nature, 2021. @article{WagnerMeasuringNature2021,
title = {Measuring algorithmically infused societies},
author = {Claudia Wagner and Markus Strohmaier and Alexandra Olteanu and Emre Kıcıman and Noshir Contractor and Tina Eliassi-Rad },
url = {https://www.nature.com/articles/s41586-021-03666-1.epdf?sharing_token=lQCG45r19TxE3tD8-h0MS9RgN0jAjWel9jnR3ZoTv0NhQG5ZZcbwM2Qx8s8zC_BcyhWa3uxAeNhO-R_s9UTTTZbyNA2eLRPbbcLuGURuUYpbOWDwoNmePiGDUOnlUg6ZpXHgXjw9ses_9B-_IOgUaMmZ-KKlFFz3p8NSiTLwXcg%3D},
doi = {https://doi.org/10.1038/s41586-021-03666-1},
year = {2021},
date = {2021-06-30},
journal = {Nature},
abstract = {It has been the historic responsibility of the social sciences to investigate human societies. Fulfilling this responsibility requires social theories, measurement models and social data. Most existing theories and measurement models in the social sciences were not developed with the deep societal reach of algorithms in mind. The emergence of ‘algorithmically infused societies’—societies whose very fabric is co-shaped by algorithmic and human behaviour—raises three key challenges: the insufficient quality of measurements, the complex consequences of (mis)measurements, and the limits of existing social theories. Here we argue that tackling these challenges requires new social theories that account for the impact of algorithmic systems on social realities. To develop such theories, we need new methodologies for integrating data and measurements into theory construction. Given the scale at which measurements can be applied, we believe measurement models should be trustworthy, auditable and just. To achieve this, the development of measurements should be transparent and participatory, and include mechanisms to ensure measurement quality and identify possible harms. We argue that computational social scientists should rethink what aspects of algorithmically infused societies should be measured, how they should be measured, and the consequences of doing so.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
It has been the historic responsibility of the social sciences to investigate human societies. Fulfilling this responsibility requires social theories, measurement models and social data. Most existing theories and measurement models in the social sciences were not developed with the deep societal reach of algorithms in mind. The emergence of ‘algorithmically infused societies’—societies whose very fabric is co-shaped by algorithmic and human behaviour—raises three key challenges: the insufficient quality of measurements, the complex consequences of (mis)measurements, and the limits of existing social theories. Here we argue that tackling these challenges requires new social theories that account for the impact of algorithmic systems on social realities. To develop such theories, we need new methodologies for integrating data and measurements into theory construction. Given the scale at which measurements can be applied, we believe measurement models should be trustworthy, auditable and just. To achieve this, the development of measurements should be transparent and participatory, and include mechanisms to ensure measurement quality and identify possible harms. We argue that computational social scientists should rethink what aspects of algorithmically infused societies should be measured, how they should be measured, and the consequences of doing so. |
Koustuv Saha, John Torous, Emre Kıcıman, Munmun De Choudhury Understanding Side Effects of Antidepressants: Large-scale Longitudinal Study on Social Media Data Journal Article In: JMIR Mental Health, 2021. @article{Saha2021,
title = {Understanding Side Effects of Antidepressants: Large-scale Longitudinal Study on Social Media Data},
author = {Koustuv Saha and John Torous and Emre Kıcıman and Munmun De Choudhury},
url = {https://mental.jmir.org/2021/3/e26589/},
doi = {10.2196/26589},
year = {2021},
date = {2021-03-19},
journal = {JMIR Mental Health},
abstract = {Background: Antidepressants are known to show heterogeneous effects across individuals and conditions, posing challenges to understanding their efficacy in mental health treatment.
Objective: We aim to understand the side effects of antidepressants from naturalistic expressions of individuals on social media.
Methods: On a large-scale Twitter dataset of individuals who self-reported using antidepressants, we conducted unsupervised language analysis to extract keywords that distinguish individuals who improved and who did not improve following the use of antidepressants.
Results: We examined five major side effects of antidepressants, sleep, weight, eating, pain, and sexual. Social media language revealed keywords related to these side effects.
Conclusions: This work enhances our understanding of the side effects of antidepressants by identifying distinct linguistic markers in the longitudinal social media data of individuals showing improved and worsened symptoms following the self-reported intake of antidepressants},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Background: Antidepressants are known to show heterogeneous effects across individuals and conditions, posing challenges to understanding their efficacy in mental health treatment.
Objective: We aim to understand the side effects of antidepressants from naturalistic expressions of individuals on social media.
Methods: On a large-scale Twitter dataset of individuals who self-reported using antidepressants, we conducted unsupervised language analysis to extract keywords that distinguish individuals who improved and who did not improve following the use of antidepressants.
Results: We examined five major side effects of antidepressants, sleep, weight, eating, pain, and sexual. Social media language revealed keywords related to these side effects.
Conclusions: This work enhances our understanding of the side effects of antidepressants by identifying distinct linguistic markers in the longitudinal social media data of individuals showing improved and worsened symptoms following the self-reported intake of antidepressants |
Shuxi Zeng, Murat Ali Bayir, Joel Pfeiffer, Denis Charles, Emre Kiciman Causal Transfer Random Forest: Combining Logged Data and Randomized Experiments for Robust Prediction Conference Proceedings of the 14th ACM Intl Conf. on Web Search and Data Mining, ACM, 2021. @conference{Zeng2020CTRF,
title = {Causal Transfer Random Forest: Combining Logged Data and Randomized Experiments for Robust Prediction},
author = {Shuxi Zeng and Murat Ali Bayir and Joel Pfeiffer and Denis Charles and Emre Kiciman},
url = {https://kiciman.org/wp-content/uploads/2021/02/WSDM_CTRF.pdf},
year = {2021},
date = {2021-03-08},
booktitle = {Proceedings of the 14th ACM Intl Conf. on Web Search and Data Mining},
publisher = {ACM},
abstract = {It is often critical for prediction models to be robust to distributional shifts between training and testing data. From a causal perspective, the challenge is to distinguish the stable causal relationships from the unstable spurious correlations across shifts. We describe a causal transfer random forest (CTRF) that combines existing training data with a small amount of data from a randomized experiment to train a model which is robust to the feature shifts and therefore transfers to a new targeting distribution. Theoretically, we justify the robustness of the approach against feature shifts with the knowledge from causal learning. Empirically, we evaluate the CTRF using both synthetic data experiments and real-world experiments in the Bing Ads platform, including a click prediction task and in the context of an end-to-end counterfactual optimization system. The proposed CTRF produces robust predictions and outperforms most baseline methods compared in the presence of feature shifts.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
It is often critical for prediction models to be robust to distributional shifts between training and testing data. From a causal perspective, the challenge is to distinguish the stable causal relationships from the unstable spurious correlations across shifts. We describe a causal transfer random forest (CTRF) that combines existing training data with a small amount of data from a randomized experiment to train a model which is robust to the feature shifts and therefore transfers to a new targeting distribution. Theoretically, we justify the robustness of the approach against feature shifts with the knowledge from causal learning. Empirically, we evaluate the CTRF using both synthetic data experiments and real-world experiments in the Bing Ads platform, including a click prediction task and in the context of an end-to-end counterfactual optimization system. The proposed CTRF produces robust predictions and outperforms most baseline methods compared in the presence of feature shifts. |
Yanbo Xu, Divyat Mahajan, Liz Manrao, Amit Sharma, Emre Kıcıman Split-Treatment Analysis to Rank Heterogeneous Causal Effects for Prospective Interventions Conference Proceedings of the 14th ACM Intl. Conf. on Web Search and Data Mining, 2021. @conference{Xu2020SplitTreatment,
title = {Split-Treatment Analysis to Rank Heterogeneous Causal Effects for Prospective Interventions},
author = {Yanbo Xu and Divyat Mahajan and Liz Manrao and Amit Sharma and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2021/02/Split_Treatment_Analysis_WSDM.pdf},
year = {2021},
date = {2021-03-08},
booktitle = {Proceedings of the 14th ACM Intl. Conf. on Web Search and Data Mining},
abstract = {For many kinds of interventions, such as a new advertisement, marketing intervention, or feature recommendation, it is important to target a specific subset of people for maximizing its benefits at minimum cost or potential harm. However, a key challenge is that no data is available about the effect of such a prospective intervention since it has not been deployed yet. In this work, we propose a split-treatment analysis that ranks the individuals most likely to be positively affected by a prospective intervention using past observational data. Unlike standard causal inference methods, the split-treatment method does not need any observations of the target treatments themselves. Instead it relies on observations of a proxy treatment that is caused by the target treatment. Under reasonable assumptions, we show that the ranking of heterogeneous causal effect based on the proxy treatment is the same as the ranking based on the target treatment’s effect. In the absence of any interventional data for cross-validation, Split-Treatment uses sensitivity analyses for unobserved confounding to eliminate unreliable models. We apply Split-Treatment to simulated data and a large-scale, real-world targeting task and validate our discovered rankings via a randomized experiment for the latter.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
For many kinds of interventions, such as a new advertisement, marketing intervention, or feature recommendation, it is important to target a specific subset of people for maximizing its benefits at minimum cost or potential harm. However, a key challenge is that no data is available about the effect of such a prospective intervention since it has not been deployed yet. In this work, we propose a split-treatment analysis that ranks the individuals most likely to be positively affected by a prospective intervention using past observational data. Unlike standard causal inference methods, the split-treatment method does not need any observations of the target treatments themselves. Instead it relies on observations of a proxy treatment that is caused by the target treatment. Under reasonable assumptions, we show that the ranking of heterogeneous causal effect based on the proxy treatment is the same as the ranking based on the target treatment’s effect. In the absence of any interventional data for cross-validation, Split-Treatment uses sensitivity analyses for unobserved confounding to eliminate unreliable models. We apply Split-Treatment to simulated data and a large-scale, real-world targeting task and validate our discovered rankings via a randomized experiment for the latter. |
Ruocheng Guo, Pengchuan Zhang, Hao Liu, Emre Kıcıman Out-of-distribution Prediction with Invariant Risk Minimization: The Limitation and An Effective Fix Miscellaneous arXiv, 2021. @misc{guo2021,
title = {Out-of-distribution Prediction with Invariant Risk Minimization: The Limitation and An Effective Fix},
author = {Ruocheng Guo and Pengchuan Zhang and Hao Liu and Emre Kıcıman},
url = {https://arxiv.org/abs/2101.07732, arXiv},
year = {2021},
date = {2021-01-16},
abstract = {This work considers the out-of-distribution (OOD) prediction problem where (1)~the training data are from multiple domains and (2)~the test domain is unseen in the training. DNNs fail in OOD prediction because they are prone to pick up spurious correlations. Recently, Invariant Risk Minimization (IRM) is proposed to address this issue. Its effectiveness has been demonstrated in the colored MNIST experiment. Nevertheless, we find that the performance of IRM can be dramatically degraded under emph{strong Λ spuriousness} -- when the spurious correlation between the spurious features and the class label is strong due to the strong causal influence of their common cause, the domain label, on both of them (see Fig. 1). In this work, we try to answer the questions: why does IRM fail in the aforementioned setting? Why does IRM work for the original colored MNIST dataset? How can we fix this problem of IRM? Then, we propose a simple and effective approach to fix the problem of IRM. We combine IRM with conditional distribution matching to avoid a specific type of spurious correlation under strong Λ spuriousness. Empirically, we design a series of semi synthetic datasets -- the colored MNIST plus, which exposes the problems of IRM and demonstrates the efficacy of the proposed method.},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
This work considers the out-of-distribution (OOD) prediction problem where (1)~the training data are from multiple domains and (2)~the test domain is unseen in the training. DNNs fail in OOD prediction because they are prone to pick up spurious correlations. Recently, Invariant Risk Minimization (IRM) is proposed to address this issue. Its effectiveness has been demonstrated in the colored MNIST experiment. Nevertheless, we find that the performance of IRM can be dramatically degraded under emph{strong Λ spuriousness} -- when the spurious correlation between the spurious features and the class label is strong due to the strong causal influence of their common cause, the domain label, on both of them (see Fig. 1). In this work, we try to answer the questions: why does IRM fail in the aforementioned setting? Why does IRM work for the original colored MNIST dataset? How can we fix this problem of IRM? Then, we propose a simple and effective approach to fix the problem of IRM. We combine IRM with conditional distribution matching to avoid a specific type of spurious correlation under strong Λ spuriousness. Empirically, we design a series of semi synthetic datasets -- the colored MNIST plus, which exposes the problems of IRM and demonstrates the efficacy of the proposed method. |
2020
|
Yuqing Du, Stas Tiomkin, Emre Kıcıman, Daniel Polani, Pieter Abbeel, Anca Dragan AvE: Assistance via Empowerment Conference Proceedings of the Thirty-fourth Conference on Neural Information Processing Systems (NeurIPS), 2020. @conference{Du2020aveb,
title = {AvE: Assistance via Empowerment},
author = {Yuqing Du and Stas Tiomkin and Emre Kıcıman and Daniel Polani and Pieter Abbeel and Anca Dragan},
url = {https://kiciman.org/wp-content/uploads/2021/02/NeurIPS-2020-ave-assistance-via-empowerment-Paper.pdf},
year = {2020},
date = {2020-12-06},
booktitle = {Proceedings of the Thirty-fourth Conference on Neural Information Processing Systems (NeurIPS)},
abstract = {One difficulty in using artificial agents for human-assistive applications lies in the challenge of accurately assisting with a person’s goal(s). Existing methods tend to rely on inferring the human’s goal, which is challenging when there are many potential goals or when the set of candidate goals is difficult to identify. We propose a new paradigm for assistance by instead increasing the human’s ability to control their environment, and formalize this approach by augmenting reinforcement learning with human empowerment. This task-agnostic objective preserves the person’s autonomy and ability to achieve any eventual state. We test our approach against assistance based on goal inference, highlighting scenarios where our method overcomes failure modes stemming from goal ambiguity or misspecification. As existing methods for estimating empowerment in continuous domains are computationally hard, precluding its use in real time learned assistance, we also propose an efficient empowerment-inspired proxy metric. Using this, we are able to successfully demonstrate our method in a shared autonomy user study for a challenging simulated teleoperation task with human-in-the-loop training.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
One difficulty in using artificial agents for human-assistive applications lies in the challenge of accurately assisting with a person’s goal(s). Existing methods tend to rely on inferring the human’s goal, which is challenging when there are many potential goals or when the set of candidate goals is difficult to identify. We propose a new paradigm for assistance by instead increasing the human’s ability to control their environment, and formalize this approach by augmenting reinforcement learning with human empowerment. This task-agnostic objective preserves the person’s autonomy and ability to achieve any eventual state. We test our approach against assistance based on goal inference, highlighting scenarios where our method overcomes failure modes stemming from goal ambiguity or misspecification. As existing methods for estimating empowerment in continuous domains are computationally hard, precluding its use in real time learned assistance, we also propose an efficient empowerment-inspired proxy metric. Using this, we are able to successfully demonstrate our method in a shared autonomy user study for a challenging simulated teleoperation task with human-in-the-loop training. |
Amit Sharma, Emre Kıcıman DoWhy: An End-to-End library for causal inference Conference To be presented at Causal Data Science Meeting 2020., 2020, (https://causalscience.org/). @conference{Sharma2020DoWhy,
title = {DoWhy: An End-to-End library for causal inference},
author = {Amit Sharma and Emre Kıcıman},
year = {2020},
date = {2020-11-11},
booktitle = {To be presented at Causal Data Science Meeting 2020.},
note = {https://causalscience.org/},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
|
Amit Sharma, Emre Kıcıman DoWhy: An End-to-End Library for Causal Inference Miscellaneous 2020. @misc{dowhyarxiv,
title = {DoWhy: An End-to-End Library for Causal Inference},
author = {Amit Sharma, Emre Kıcıman},
url = {https://arxiv.org/abs/2011.04216},
year = {2020},
date = {2020-11-09},
abstract = {In addition to efficient statistical estimators of a treatment's effect, successful application of causal inference requires specifying assumptions about the mechanisms underlying observed data and testing whether they are valid, and to what extent. However, most libraries for causal inference focus only on the task of providing powerful statistical estimators. We describe DoWhy, an open-source Python library that is built with causal assumptions as its first-class citizens, based on the formal framework of causal graphs to specify and test causal assumptions. DoWhy presents an API for the four steps common to any causal analysis---1) modeling the data using a causal graph and structural assumptions, 2) identifying whether the desired effect is estimable under the causal model, 3) estimating the effect using statistical estimators, and finally 4) refuting the obtained estimate through robustness checks and sensitivity analyses. In particular, DoWhy implements a number of robustness checks including placebo tests, bootstrap tests, and tests for unoberved confounding. DoWhy is an extensible library that supports interoperability with other implementations, such as EconML and CausalML for the the estimation step. The library is available at this https URL},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
In addition to efficient statistical estimators of a treatment's effect, successful application of causal inference requires specifying assumptions about the mechanisms underlying observed data and testing whether they are valid, and to what extent. However, most libraries for causal inference focus only on the task of providing powerful statistical estimators. We describe DoWhy, an open-source Python library that is built with causal assumptions as its first-class citizens, based on the formal framework of causal graphs to specify and test causal assumptions. DoWhy presents an API for the four steps common to any causal analysis---1) modeling the data using a causal graph and structural assumptions, 2) identifying whether the desired effect is estimable under the causal model, 3) estimating the effect using statistical estimators, and finally 4) refuting the obtained estimate through robustness checks and sensitivity analyses. In particular, DoWhy implements a number of robustness checks including placebo tests, bootstrap tests, and tests for unoberved confounding. DoWhy is an extensible library that supports interoperability with other implementations, such as EconML and CausalML for the the estimation step. The library is available at this https URL |
Razieh Nabi, Joel Pfeiffer, Murat Ali Bayir, Denis Charles, Emre Kıcıman Causal Inference in the Presence of Interference in Sponsored Search Advertising Miscellaneous arXiv, 2020. @misc{nabi2020adinterference,
title = {Causal Inference in the Presence of Interference in Sponsored Search Advertising},
author = {Razieh Nabi and Joel Pfeiffer and Murat Ali Bayir and Denis Charles and Emre Kıcıman},
url = {https://arxiv.org/abs/2010.07458},
year = {2020},
date = {2020-10-14},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Ivan Evtimov, Weidong Cui, Ece Kamar, Emre Kıcıman, Tadayoshi Kohno, Jerry Li Security and Machine Learning in the Real World Miscellaneous arXiv, 2020. @misc{Evtimov2020SecurityArxiv,
title = {Security and Machine Learning in the Real World},
author = {Ivan Evtimov and Weidong Cui and Ece Kamar and Emre Kıcıman and Tadayoshi Kohno and Jerry Li},
url = {https://arxiv.org/abs/2007.07205},
year = {2020},
date = {2020-07-13},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Yuqing Du, Stas Tiomkin, Emre Kıcıman, Daniel Polani, Pieter Abbeel, Anca Dragan AvE: Assistance via Empowerment Miscellaneous arXiv, 2020. @misc{du2020avearxiv,
title = {AvE: Assistance via Empowerment},
author = {Yuqing Du and Stas Tiomkin and Emre Kıcıman and Daniel Polani and Pieter Abbeel and Anca Dragan},
url = {https://arxiv.org/abs/2006.14796},
year = {2020},
date = {2020-06-26},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Dana Nguyen, Alexandra Olteanu, Emre Kiciman External Information Sharing on Health Forums: An Exploration Conference Proceedings of Intl. Conf. on Web and Social Media, AAAI, 2020. @conference{nguyen2020External,
title = {External Information Sharing on Health Forums: An Exploration},
author = {Dana Nguyen and Alexandra Olteanu and Emre Kiciman},
year = {2020},
date = {2020-06-01},
booktitle = {Proceedings of Intl. Conf. on Web and Social Media},
publisher = {AAAI},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
|
Yuhang Song, Wenbo Li, Lei Zhang, Jianwei Yang, Emre Kıcıman, Hamid Palangi, Jianfeng Gao, C.-C. Jay Kuo, Pengchuan Zhang Novel Human-Object Interaction Detection via Adversarial Domain Generalization Miscellaneous arXiv, 2020. @misc{SongNovelHOIArxiv2020,
title = {Novel Human-Object Interaction Detection via Adversarial Domain Generalization},
author = {Yuhang Song and Wenbo Li and Lei Zhang and Jianwei Yang and Emre Kıcıman and Hamid Palangi and Jianfeng Gao and C.-C. Jay Kuo and Pengchuan Zhang},
year = {2020},
date = {2020-05-22},
howpublished = {arXiv},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2019
|
Shuxi Zeng, Emre Kıcıman, Denis Charles, Joel Pfeiffer, Murat Ali Bayir Causal Transfer Random Forest: Leveraging Observational and Randomization Studies Workshop NeurIPS 2019 Workshop, “Do the right thing”: machine learning and causal inference for improved decision making, 2019. @workshop{Zeng2019,
title = {Causal Transfer Random Forest: Leveraging Observational and Randomization Studies},
author = {Shuxi Zeng and Emre Kıcıman and Denis Charles and Joel Pfeiffer and Murat Ali Bayir},
year = {2019},
date = {2019-12-14},
booktitle = {NeurIPS 2019 Workshop, “Do the right thing”: machine learning and causal inference for improved decision making},
abstract = {It is often critical for prediction models to be robust to distributional shifts. Online advertisement platforms, for example, evaluate systems and policy changes using models that predict whether users will click on shown advertisements. Click prediction models built using conventional machine learning methods, however, become unreliable when the new systems or policy significantly shifts the feature distributions away from the available training data---usually large-scale observational data from the online system. In this paper, we describe a causal transfer random forest (CTRF) which combines existing training data with a small amount of data from randomized experiments to make robust predictions under distributional shifts. We learn the CTRF tree structure from randomized data---which breaks spurious correlations between input features and prediction targets---and then calibrate each node with both existing large-scale training data and randomized data. We evaluate the proposed method using data from radical exploration flights in an online ad platform and find that the CTRF outperforms other approaches.},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
It is often critical for prediction models to be robust to distributional shifts. Online advertisement platforms, for example, evaluate systems and policy changes using models that predict whether users will click on shown advertisements. Click prediction models built using conventional machine learning methods, however, become unreliable when the new systems or policy significantly shifts the feature distributions away from the available training data---usually large-scale observational data from the online system. In this paper, we describe a causal transfer random forest (CTRF) which combines existing training data with a small amount of data from randomized experiments to make robust predictions under distributional shifts. We learn the CTRF tree structure from randomized data---which breaks spurious correlations between input features and prediction targets---and then calibrate each node with both existing large-scale training data and randomized data. We evaluate the proposed method using data from radical exploration flights in an online ad platform and find that the CTRF outperforms other approaches. |
Shuxi Zeng, Pengchuan Zhang, Denis Charles, Eren Manavoglu, Emre Kıcıman Robust Neural Networks for Causal Invariant Features Extraction Workshop NeurIPS 2019 Workshop, “Do the right thing”: machine learning and causal inference for improved decision making
, 2019. @workshop{Zeng2019b,
title = {Robust Neural Networks for Causal Invariant Features Extraction},
author = {Shuxi Zeng and Pengchuan Zhang and Denis Charles and Eren Manavoglu and Emre Kıcıman},
year = {2019},
date = {2019-12-14},
booktitle = {NeurIPS 2019 Workshop, “Do the right thing”: machine learning and causal inference for improved decision making
},
abstract = {Most machine learning approaches exploit correlational relationships in a training data set to predict a target variable. When these correlations are spurious or unreliable, this hampers the ability to generalize learned models to new environments. In contrast, models exploiting causal relationships between features and the outcome generalize better across environments. In this paper, we posit that these robust causal relationships can be identified by finding features that, when conditioned upon, render the outcome invariant across environments---that is, when the outcome is independent of the environment given a set of selected features with lower dimensions. We propose a neural network architecture for this task, comparing it with several existing approaches to exploit the causal invariant property, with a discussion on their motivations in a unified framework. Empirically, we perform a simulated experiment to demonstrate and compare the performance of the proposed method to the existing approaches. Finally, we measure its efficacy in a real world data set for advertisement click prediction.},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
Most machine learning approaches exploit correlational relationships in a training data set to predict a target variable. When these correlations are spurious or unreliable, this hampers the ability to generalize learned models to new environments. In contrast, models exploiting causal relationships between features and the outcome generalize better across environments. In this paper, we posit that these robust causal relationships can be identified by finding features that, when conditioned upon, render the outcome invariant across environments---that is, when the outcome is independent of the environment given a set of selected features with lower dimensions. We propose a neural network architecture for this task, comparing it with several existing approaches to exploit the causal invariant property, with a discussion on their motivations in a unified framework. Empirically, we perform a simulated experiment to demonstrate and compare the performance of the proposed method to the existing approaches. Finally, we measure its efficacy in a real world data set for advertisement click prediction. |
Andi Peng, Besmira Nushi, Emre Kiciman, Kori Inkpen, Siddharth Suri, Ece Kamar What You See is What You Get? The Impact of Representation Criteria on Human Bias in Hiring Conference The Seventh AAAI Conference on Human Computation and Crowdsourcing, AAAI, 2019. @conference{Peng2019,
title = {What You See is What You Get? The Impact of Representation Criteria on Human Bias in Hiring},
author = {Andi Peng and Besmira Nushi and Emre Kiciman and Kori Inkpen and Siddharth Suri and Ece Kamar},
url = {https://kiciman.org/wp-content/uploads/2019/12/HCOMP_Hiring_Bias_Peng.pdf},
year = {2019},
date = {2019-10-28},
booktitle = {The Seventh AAAI Conference on Human Computation and Crowdsourcing},
publisher = {AAAI},
abstract = {Although systematic biases in decision-making are widely documented, the ways in which they emerge from different sources is less understood. We present a controlled experimental platform to study gender bias in hiring by decoupling the effect of world distribution (the gender breakdown of candidates in a specific profession) from bias in human decision-making. We explore the effectiveness of representation criteria, fixed proportional display of candidates, as an intervention strategy for mitigation of gender bias by conducting experiments measuring human decision-makers’ rankings for who they would recommend as potential hires. Experiments across professions with varying gender proportions show that balancing gender representation in candidate slates can correct biases for some professions where the world distribution is skewed, although doing so has no impact on other professions where human persistent preferences are at play. We show that the gender of the decision-maker, complexity of the decision-making task and over- and under-representation of genders in the candidate slate can all impact the final decision. By decoupling sources of bias, we can better isolate strategies for bias mitigation in human-in-the-loop systems.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Although systematic biases in decision-making are widely documented, the ways in which they emerge from different sources is less understood. We present a controlled experimental platform to study gender bias in hiring by decoupling the effect of world distribution (the gender breakdown of candidates in a specific profession) from bias in human decision-making. We explore the effectiveness of representation criteria, fixed proportional display of candidates, as an intervention strategy for mitigation of gender bias by conducting experiments measuring human decision-makers’ rankings for who they would recommend as potential hires. Experiments across professions with varying gender proportions show that balancing gender representation in candidate slates can correct biases for some professions where the world distribution is skewed, although doing so has no impact on other professions where human persistent preferences are at play. We show that the gender of the decision-maker, complexity of the decision-making task and over- and under-representation of genders in the candidate slate can all impact the final decision. By decoupling sources of bias, we can better isolate strategies for bias mitigation in human-in-the-loop systems. |
Alexandra Olteanu, Carlos Castillo, Fernando Diaz, Emre Kıcıman Social Data: Biases, Methodological Pitfalls, and Ethical Boundaries Journal Article In: Frontiers in Big Data, vol. 2, 2019, ISSN: 2624-909X. @article{SocialDataBiasb,
title = {Social Data: Biases, Methodological Pitfalls, and Ethical Boundaries},
author = {Alexandra Olteanu and Carlos Castillo and Fernando Diaz and Emre Kıcıman},
editor = {Juergen Pfeffer},
url = {https://www.frontiersin.org/article/10.3389/fdata.2019.00013
https://kiciman.org/wp-content/uploads/2019/07/Social-Data-Biases-Survey-Frontiers-Final.pdf},
doi = {10.3389/fdata.2019.00013},
issn = {2624-909X},
year = {2019},
date = {2019-07-11},
journal = {Frontiers in Big Data},
volume = {2},
abstract = {Social data in digital form, including user-generated content, expressed or implicit relations between people, and behavioral traces, are at the core of popular applications and platforms, driving the research agenda of many researchers. The promises of social data are many, including understanding ``what the world thinks'' about a social issue, brand, celebrity, or other entity, as well as enabling better decision-making in a variety of fields including public policy, healthcare, and economics. Many academics and practitioners have warned against the naive usage of social data. There are biases and inaccuracies occurring at the source of the data, but also introduced during processing. There are methodological limitations and pitfalls, as well as ethical boundaries and unexpected consequences that are often overlooked. This paper recognizes the rigor with which these issues are addressed by different researchers varies across a wide range. We identify a variety of menaces in the practices around social data use, and organize them in a framework that helps to identify them.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Social data in digital form, including user-generated content, expressed or implicit relations between people, and behavioral traces, are at the core of popular applications and platforms, driving the research agenda of many researchers. The promises of social data are many, including understanding ``what the world thinks'' about a social issue, brand, celebrity, or other entity, as well as enabling better decision-making in a variety of fields including public policy, healthcare, and economics. Many academics and practitioners have warned against the naive usage of social data. There are biases and inaccuracies occurring at the source of the data, but also introduced during processing. There are methodological limitations and pitfalls, as well as ethical boundaries and unexpected consequences that are often overlooked. This paper recognizes the rigor with which these issues are addressed by different researchers varies across a wide range. We identify a variety of menaces in the practices around social data use, and organize them in a framework that helps to identify them. |
Koustuv Saha, Benjamin Sugar, John Torous, Bruno Abrahao, Emre Kıcıman, Munmun De Choudhury A Social Media Study on the Effects of Psychiatric Medication Use Conference Proceedings of the International AAAI Conference on Web and Social Media (ICWSM 2019), AAAI, 2019. @conference{Saha2019,
title = {A Social Media Study on the Effects of Psychiatric Medication Use},
author = {Koustuv Saha and Benjamin Sugar and John Torous and Bruno Abrahao and Emre Kıcıman and Munmun De Choudhury},
url = {https://kiciman.org/wp-content/uploads/2019/07/ICWSM19_DrugEffects.pdf},
year = {2019},
date = {2019-06-11},
booktitle = {Proceedings of the International AAAI Conference on Web and Social Media (ICWSM 2019)},
publisher = {AAAI},
abstract = {Understanding the effects of psychiatric medications during mental health treatment constitutes an active area of inquiry. While clinical trials help evaluate the effects of these medications, many trials suffer from a lack of generalizability to broader populations. We leverage social media data to examine psychopathological effects subject to self-reported usage of psychiatric medication. Using a list of common approved and regulated psychiatric drugs and a Twitter dataset of 300M posts from 30K individuals, we develop machine learning models to first assess effects relating to mood, cognition, depression, anxiety, psychosis, and suicidal ideation. Then, based on a stratified propensity score based causal analysis, we observe that usage of specific drugs are associated with characteristic changes in an individual’s psychopathology. We situate these observations in the psychiatry literature, with a deeper analysis of pre-treatment cues that predict treatment outcomes. Our work bears potential to inspire novel clinical investigations and to build tools for digital therapeutics.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Understanding the effects of psychiatric medications during mental health treatment constitutes an active area of inquiry. While clinical trials help evaluate the effects of these medications, many trials suffer from a lack of generalizability to broader populations. We leverage social media data to examine psychopathological effects subject to self-reported usage of psychiatric medication. Using a list of common approved and regulated psychiatric drugs and a Twitter dataset of 300M posts from 30K individuals, we develop machine learning models to first assess effects relating to mood, cognition, depression, anxiety, psychosis, and suicidal ideation. Then, based on a stratified propensity score based causal analysis, we observe that usage of specific drugs are associated with characteristic changes in an individual’s psychopathology. We situate these observations in the psychiatry literature, with a deeper analysis of pre-treatment cues that predict treatment outcomes. Our work bears potential to inspire novel clinical investigations and to build tools for digital therapeutics. |
Saleema Amershi, Ece Kamar, Emre Kıcıman People and AI See Things Different Implications of Mismatched Perception on HCI for AI Systems Workshop Human-Centered Machine Learning Perspectives at CHI 2019, 2019. @workshop{Amershi2019,
title = {People and AI See Things Different Implications of Mismatched Perception on HCI for AI Systems},
author = {Saleema Amershi and Ece Kamar and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2019/07/People_and_AI-Mismatched_Perceptions.pdf},
year = {2019},
date = {2019-05-04},
booktitle = {Human-Centered Machine Learning Perspectives at CHI 2019},
abstract = {People and AI are increasingly interacting and collaborating in the context of critical application domains (e.g., healthcare, finance, transportation, and legal systems). There is often, however, a fundamental mismatch between how humans and machines perceive and reason about the world. This offers opportunities for bringing together multiple perspectives to reach better outcomes. On the other hand, this mismatch can hurt coordination and result in serious failures (e.g., semi-autonomous vehicle accidents and misdiagnoses by clinical decision support systems). We believe a key solution is to ground communications between humans and machines in their common perceptions while allowing people to inspect and verify the AI and appropriately intervene when necessary. Achieving this requires the HCI and AI communities to address several challenges and to co-design HCI-AI patterns that enable verifiability, control, and consistency.},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
People and AI are increasingly interacting and collaborating in the context of critical application domains (e.g., healthcare, finance, transportation, and legal systems). There is often, however, a fundamental mismatch between how humans and machines perceive and reason about the world. This offers opportunities for bringing together multiple perspectives to reach better outcomes. On the other hand, this mismatch can hurt coordination and result in serious failures (e.g., semi-autonomous vehicle accidents and misdiagnoses by clinical decision support systems). We believe a key solution is to ground communications between humans and machines in their common perceptions while allowing people to inspect and verify the AI and appropriately intervene when necessary. Achieving this requires the HCI and AI communities to address several challenges and to co-design HCI-AI patterns that enable verifiability, control, and consistency. |
Maggie Makar, Adith Swaminathan, Emre Kıcıman A Distillation Approach to Data Efficient Individual Treatment Effect Estimation Proceedings Article In: Proceedings of the Thirty-Third AAAI Conference on Artificial Intelligence (AAAI-19) , 2019. @inproceedings{Makar2019,
title = {A Distillation Approach to Data Efficient Individual Treatment Effect Estimation},
author = {Maggie Makar and Adith Swaminathan and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2019/03/deitee-camera-ready.pdf},
year = {2019},
date = {2019-01-27},
booktitle = {Proceedings of the Thirty-Third AAAI Conference on Artificial Intelligence (AAAI-19) },
abstract = {The potential for using machine learning algorithms as a tool for suggesting optimal interventions has fueled significant interest in developing methods for estimating heterogeneous or individual treatment effects (ITEs) from observational data. While several methods for estimating ITEs have been recently suggested, these methods assume no constraints on the availability of data at the time of deployment or test time. This assumption is unrealistic in settings where data acquisition is a significant part of the analysis pipeline, meaning data about a test case has to be collected in order to predict the ITE. In this work, we present Data Efficient Individual Treatment Effect Estimation (DEITEE), a method that exploits the idea that adjusting for confounding, and hence collecting information about confounders, is not necessary at test time. DEITEE allows the development of rich models that exploit all variables at train time but identifies a minimal set of variables required to estimate the ITE at test time. Using 77 semi-synthetic datasets with varying data generating processes, we show that DEITEE achieves significant reductions in the number of variables required at test time without sacrificing accuracy. Using real data, we demonstrate the utility of our approach in helping soon-to-be mothers make planning and lifestyle decisions that will impact newborn health},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
The potential for using machine learning algorithms as a tool for suggesting optimal interventions has fueled significant interest in developing methods for estimating heterogeneous or individual treatment effects (ITEs) from observational data. While several methods for estimating ITEs have been recently suggested, these methods assume no constraints on the availability of data at the time of deployment or test time. This assumption is unrealistic in settings where data acquisition is a significant part of the analysis pipeline, meaning data about a test case has to be collected in order to predict the ITE. In this work, we present Data Efficient Individual Treatment Effect Estimation (DEITEE), a method that exploits the idea that adjusting for confounding, and hence collecting information about confounders, is not necessary at test time. DEITEE allows the development of rich models that exploit all variables at train time but identifies a minimal set of variables required to estimate the ITE at test time. Using 77 semi-synthetic datasets with varying data generating processes, we show that DEITEE achieves significant reductions in the number of variables required at test time without sacrificing accuracy. Using real data, we demonstrate the utility of our approach in helping soon-to-be mothers make planning and lifestyle decisions that will impact newborn health |
2018
|
E. Leypunskiy, E. Kıcıman, M. Shah, O. J. Walch, A. Rzhetsky, A. Dinner, M. J. Rust Geographically Resolved Rhythms in Twitter Use Reveal Social Pressures on Daily Activity Patterns Journal Article In: Current Biology, 2018. @article{leypunskiy2019,
title = {Geographically Resolved Rhythms in Twitter Use Reveal Social Pressures on Daily Activity Patterns},
author = {E. Leypunskiy and E. Kıcıman and M. Shah and O. J. Walch and A. Rzhetsky and A. Dinner and M. J. Rust},
url = {https://www.cell.com/current-biology/fulltext/S0960-9822(18)31345-9},
doi = {https://doi.org/10.1016/j.cub.2018.10.016},
year = {2018},
date = {2018-11-15},
journal = {Current Biology},
abstract = {Daily rhythms in human physiology and behavior are driven by the interplay of circadian rhythms, environmental cycles, and social schedules. Much research has focused on the mechanism and function of circadian rhythms in constant conditions or in idealized light-dark environments. There have been comparatively few studies into how social pressures, such as work and school schedules, affect human activity rhythms day to day and season to season. To address this issue, we analyzed activity on Twitter in >1,500 US counties throughout the 2012–2013 calendar years in 15-min intervals using geographically tagged tweets representing ≈0.1% of the total population each day. We find that sustained periods of low Twitter activity are correlated with sufficient sleep as measured by conventional surveys. We show that this nighttime lull in Twitter activity is shifted to later times on weekends relative to weekdays, a phenomenon we term “Twitter social jet lag.” The magnitude of this social jet lag varies seasonally and geographically—with the West Coast experiencing less Twitter social jet lag compared to the Central and Eastern US—and is correlated with average commuting schedules and disease risk factors such as obesity. Most counties experience the largest amount of Twitter social jet lag in February and the lowest in June or July. We present evidence that these shifts in weekday activity coincide with relaxed social pressures due to local K-12 school holidays and that the direct seasonal effect of altered day length is comparatively weaker.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Daily rhythms in human physiology and behavior are driven by the interplay of circadian rhythms, environmental cycles, and social schedules. Much research has focused on the mechanism and function of circadian rhythms in constant conditions or in idealized light-dark environments. There have been comparatively few studies into how social pressures, such as work and school schedules, affect human activity rhythms day to day and season to season. To address this issue, we analyzed activity on Twitter in >1,500 US counties throughout the 2012–2013 calendar years in 15-min intervals using geographically tagged tweets representing ≈0.1% of the total population each day. We find that sustained periods of low Twitter activity are correlated with sufficient sleep as measured by conventional surveys. We show that this nighttime lull in Twitter activity is shifted to later times on weekends relative to weekdays, a phenomenon we term “Twitter social jet lag.” The magnitude of this social jet lag varies seasonally and geographically—with the West Coast experiencing less Twitter social jet lag compared to the Central and Eastern US—and is correlated with average commuting schedules and disease risk factors such as obesity. Most counties experience the largest amount of Twitter social jet lag in February and the lowest in June or July. We present evidence that these shifts in weekday activity coincide with relaxed social pressures due to local K-12 school holidays and that the direct seasonal effect of altered day length is comparatively weaker. |
Munmun De Choudhury, Emre Kıcıman Integrating Artificial and Human Intelligence in Complex, Sensitive Problem Domains: Experiences from Mental Health Journal Article In: AI Magazine, vol. 39, no. 3, 2018. @article{Choudhury2018,
title = {Integrating Artificial and Human Intelligence in Complex, Sensitive Problem Domains: Experiences from Mental Health},
author = {Munmun De Choudhury and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2018/10/AIMag_IntegratingAIandHumanIntelligence_Fall2018.pdf},
year = {2018},
date = {2018-09-01},
journal = {AI Magazine},
volume = {39},
number = {3},
abstract = {This article presents a position highlighting the importance of combining artificial intelligence approaches with human intelligence, in other words, the involvement of humans. To do so, we specifically focus on problems of societal significance, stemming from complex, sensitive domains. We first discuss our prior work across a series of projects surrounding social media and mental health, and identify major themes for which augmentation of AI systems and techniques with human feedback has been and can be fruitful and meaningful. We then conclude by noting the implications, in terms of opportunities as well as challenges, that can be drawn from our position, both relating to the specific domain of mental health and for AI researchers and practitioners.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
This article presents a position highlighting the importance of combining artificial intelligence approaches with human intelligence, in other words, the involvement of humans. To do so, we specifically focus on problems of societal significance, stemming from complex, sensitive domains. We first discuss our prior work across a series of projects surrounding social media and mental health, and identify major themes for which augmentation of AI systems and techniques with human feedback has been and can be fruitful and meaningful. We then conclude by noting the implications, in terms of opportunities as well as challenges, that can be drawn from our position, both relating to the specific domain of mental health and for AI researchers and practitioners. |
Emre Kıcıman, Jorgen Thelin Answering What If, Should I, and Other Expectation Exploration Queries Using Causal Inference over Longitudinal Data Conference Proceedings of 1st Biennial Conference on Design of Experimental Search and Information Retrieval Systems (DESIRES 2018), 2018. @conference{kiciman-answering-2018,
title = {Answering What If, Should I, and Other Expectation Exploration Queries Using Causal Inference over Longitudinal Data},
author = {Emre Kıcıman and Jorgen Thelin},
url = {https://kiciman.org/wp-content/uploads/2018/09/msr_outcomes_service_desires_2018_cameraready.pdf},
year = {2018},
date = {2018-08-28},
booktitle = {Proceedings of 1st Biennial Conference on Design of Experimental Search and Information Retrieval Systems (DESIRES 2018)},
abstract = {Many people use web search engines for expectation exploration: exploring what might happen if they take some action, or how they should expect some situation to evolve. While search engines have databases to provide structured answers to many questions, there is no database about the outcomes of actions or the evolution of situations. The information we need to answer such questions, however, is already being recorded. On social media, for example, hundreds of millions of people are publicly reporting about the actions they take and the situations they are in, and an increasing range of events and activities experienced in their lives over time. Here, we show how causal inference methods can be applied to such data to generate answers for expectation exploration queries. This paper describes a system implementation for running ad-hoc online causal inference analyses. The analysis results can be used to generate pros/cons lists for decision support, timeline representations to show how situations evolve, and be embedded in many other decision support and planning applications. We discuss potential methods for evaluating the fundamental quality of inference results and judge the short-term and long-term usefulness of information for users.
},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Many people use web search engines for expectation exploration: exploring what might happen if they take some action, or how they should expect some situation to evolve. While search engines have databases to provide structured answers to many questions, there is no database about the outcomes of actions or the evolution of situations. The information we need to answer such questions, however, is already being recorded. On social media, for example, hundreds of millions of people are publicly reporting about the actions they take and the situations they are in, and an increasing range of events and activities experienced in their lives over time. Here, we show how causal inference methods can be applied to such data to generate answers for expectation exploration queries. This paper describes a system implementation for running ad-hoc online causal inference analyses. The analysis results can be used to generate pros/cons lists for decision support, timeline representations to show how situations evolve, and be embedded in many other decision support and planning applications. We discuss potential methods for evaluating the fundamental quality of inference results and judge the short-term and long-term usefulness of information for users.
|
Emre Kıcıman Causal Inference over Longitudinal Data to Support Expectation Exploration Conference Extended Abstract: SIGIR Industry Day Invited Talk, 2018. @conference{kiciman_sigirindustry,
title = {Causal Inference over Longitudinal Data to Support Expectation Exploration },
author = {Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2018/07/sigir18_kiciman_industry_invitedtalk.pdf
https://kiciman.org/wp-content/uploads/2018/05/main.pdf},
year = {2018},
date = {2018-07-10},
booktitle = {Extended Abstract: SIGIR Industry Day Invited Talk},
abstract = {Many people use web search engines for expectation exploration: exploring what might happen if they take some action, or how they should expect some situation to evolve. While search engines have databases to provide structured answers to many questions, there is no database about the outcomes of actions or the evolution of situations. The information we need to answer such questions, however, is already being recorded. On social media, for example, hundreds of millions of people are publicly reporting about the actions they take and the situations they are in, and an increasing range of events and activities experienced in their lives over time. In this presentation, we show how causal inference methods can be applied to such individual-level, longitudinal records to generate answers for expectation exploration queries. },
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Many people use web search engines for expectation exploration: exploring what might happen if they take some action, or how they should expect some situation to evolve. While search engines have databases to provide structured answers to many questions, there is no database about the outcomes of actions or the evolution of situations. The information we need to answer such questions, however, is already being recorded. On social media, for example, hundreds of millions of people are publicly reporting about the actions they take and the situations they are in, and an increasing range of events and activities experienced in their lives over time. In this presentation, we show how causal inference methods can be applied to such individual-level, longitudinal records to generate answers for expectation exploration queries. |
Emre Kıcıman, Scott Counts, Melissa Gasser Using Longitudinal Social Media Analysis to Understand the Effects of Early College Alcohol Use Proceedings Article In: Intl. Conf. on Web and Social Media (ICWSM-18), AAAI, 2018. @inproceedings{Kiciman2018,
title = {Using Longitudinal Social Media Analysis to Understand the Effects of Early College Alcohol Use},
author = {Emre Kıcıman and Scott Counts and Melissa Gasser},
url = {https://kiciman.org/wp-content/uploads/2018/10/college_alcohol_tweets_icwsm18e.pdf},
year = {2018},
date = {2018-06-25},
booktitle = {Intl. Conf. on Web and Social Media (ICWSM-18)},
publisher = {AAAI},
abstract = {While college completion is predictive of individual career happiness and economic achievement, many factors, such as excessive alcohol usage, jeopardize college success. In this paper, we propose a method for analyzing large-scale, longitudinal social media timelines to provide fine-grained visibility into how the behaviors and trajectories of alcohol-mentioning students differ from their peers. Using propensity score stratification to reduce bias from confounding factors, we analyze the Twitter data of 63k college students over 5 years to study the effect of early alcohol usage on topics linked to college success. We find multi-year effects, including lower mentions of study habits, increased mentions of potentially risky behaviors, and decreases in mentions of positive emotions. We conclude with a discussion of social media data's role in the study of the risky behaviors of college students and other individual behaviors with long-term effects.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
While college completion is predictive of individual career happiness and economic achievement, many factors, such as excessive alcohol usage, jeopardize college success. In this paper, we propose a method for analyzing large-scale, longitudinal social media timelines to provide fine-grained visibility into how the behaviors and trajectories of alcohol-mentioning students differ from their peers. Using propensity score stratification to reduce bias from confounding factors, we analyze the Twitter data of 63k college students over 5 years to study the effect of early alcohol usage on topics linked to college success. We find multi-year effects, including lower mentions of study habits, increased mentions of potentially risky behaviors, and decreases in mentions of positive emotions. We conclude with a discussion of social media data's role in the study of the risky behaviors of college students and other individual behaviors with long-term effects. |
Munmun De Choudhury, Emre Kıcıman Integrating Online and Offline Data in Complex, Sensitive Problem Domains: Experiences from Mental Health Workshop In ICWSM Workshop on Social Media and Health: A Focus on Methods for Linking Online and Offline Data , 2018. @workshop{offlineonline_mentalhealth,
title = {Integrating Online and Offline Data in Complex, Sensitive Problem Domains: Experiences from Mental Health},
author = {Munmun De Choudhury and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2018/05/ICWSM18_Offline_Online-mental_health.pdf},
year = {2018},
date = {2018-06-25},
booktitle = {In ICWSM Workshop on Social Media and Health: A Focus on Methods for Linking Online and Offline Data },
abstract = {A growing body of research in the ICWSM community and beyond has employed large-scale, unobtrusively gathered online data, primarily from social media sites, to model, understand, and rethink improving health and well-being. This short paper highlights the prior work of the authors in augmenting such online data driven approaches with offline information. To do so, the authors first present some of the challenges in utilizing online data alone in problems relating to the health domain. Then, we present three themes about how offline information may be harnessed, ranging from its use as a source of data, to obtaining theoretical explanations of computational models, and to improving the outcomes of online-data only models. Thereafter, we highlight some lessons learned from our work in doing so, in the domain of mental health. The paper concludes by situating offline information as an important resource that is critical to large-scale studies of health and well-being.
},
keywords = {},
pubstate = {published},
tppubtype = {workshop}
}
A growing body of research in the ICWSM community and beyond has employed large-scale, unobtrusively gathered online data, primarily from social media sites, to model, understand, and rethink improving health and well-being. This short paper highlights the prior work of the authors in augmenting such online data driven approaches with offline information. To do so, the authors first present some of the challenges in utilizing online data alone in problems relating to the health domain. Then, we present three themes about how offline information may be harnessed, ranging from its use as a source of data, to obtaining theoretical explanations of computational models, and to improving the outcomes of online-data only models. Thereafter, we highlight some lessons learned from our work in doing so, in the domain of mental health. The paper concludes by situating offline information as an important resource that is critical to large-scale studies of health and well-being.
|
Mehrdad Farajtabar, Emre Kıcıman, Girish Nathan, Ryen White Modeling Behaviors and Lifestyle with Online and Social Data for Predicting and Analyzing Sleep and Exercise Quality Journal Article In: International Journal of Data Science and Analytics, vol. 5, no. 4, 2018. @article{farajtabar-modeling2018,
title = {Modeling Behaviors and Lifestyle with Online and Social Data for Predicting and Analyzing Sleep and Exercise Quality},
author = {Mehrdad Farajtabar and Emre Kıcıman and Girish Nathan and Ryen White},
url = {https://rdcu.be/YMfd},
doi = {https://doi.org/10.1007/s41060-018-0136-8},
year = {2018},
date = {2018-06-16},
journal = { International Journal of Data Science and Analytics},
volume = {5},
number = {4},
abstract = {While recent data studies have focused on associations between sleep and exercise patterns as captured by digital fitness devices, it is known that sleep and exercise quality are affected by a much broader set of factors not captured by these devices, such as general lifestyle, eating, and stress. Here, we conduct a large-scale data study of exercise and sleep effects through an analysis of eight months of exercise and sleep data for 20k users, combined with search query logs, location information and aggregated social media data. We analyze factors correlated with better sleep and more effective exercise, and confirm these relationships through causal inference analysis. Further, we build linear models to predict individuals' sleep and exercise quality. This analysis demonstrates the potential benefits of combining online and social data sources with data from health trackers, and is a potentially rich computational benchmark for health studies. We discuss the implications of our work for individuals, health practitioners and health systems.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
While recent data studies have focused on associations between sleep and exercise patterns as captured by digital fitness devices, it is known that sleep and exercise quality are affected by a much broader set of factors not captured by these devices, such as general lifestyle, eating, and stress. Here, we conduct a large-scale data study of exercise and sleep effects through an analysis of eight months of exercise and sleep data for 20k users, combined with search query logs, location information and aggregated social media data. We analyze factors correlated with better sleep and more effective exercise, and confirm these relationships through causal inference analysis. Further, we build linear models to predict individuals' sleep and exercise quality. This analysis demonstrates the potential benefits of combining online and social data sources with data from health trackers, and is a potentially rich computational benchmark for health studies. We discuss the implications of our work for individuals, health practitioners and health systems. |
Ishanu Chattopadhyay, Emre Kıcıman, Joshua W Elliott, Jeffrey L Shaman, Andrey Rzhetsky Conjunction of factors triggering waves of seasonal influenza Journal Article In: eLife, vol. 7, pp. e30756, 2018. @article{10.7554/eLife.30756,
title = {Conjunction of factors triggering waves of seasonal influenza},
author = {Ishanu Chattopadhyay, Emre Kıcıman, Joshua W Elliott, Jeffrey L Shaman, Andrey Rzhetsky},
editor = {Mark Jit},
url = {https://doi.org/10.7554/eLife.30756},
doi = {10.7554/eLife.30756},
year = {2018},
date = {2018-02-27},
journal = {eLife},
volume = {7},
pages = {e30756},
abstract = {Using several longitudinal datasets describing putative factors affecting influenza incidence and clinical data on the disease and health status of over 150 million human subjects observed over a decade, we investigated the source and the mechanistic triggers of influenza epidemics. We conclude that the initiation of a pan-continental influenza wave emerges from the simultaneous realization of a complex set of conditions. The strongest predictor groups are as follows, ranked by importance: (1) the host population’s socio- and ethno-demographic properties; (2) weather variables pertaining to specific humidity, temperature, and solar radiation; (3) the virus’ antigenic drift over time; (4) the host population’s land-based travel habits, and; (5) recent spatio-temporal dynamics, as reflected in the influenza wave auto-correlation. The models we infer are demonstrably predictive (area under the Receiver Operating Characteristic curve 80%) when tested with out-of-sample data, opening the door to the potential formulation of new population-level intervention and mitigation policies.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Using several longitudinal datasets describing putative factors affecting influenza incidence and clinical data on the disease and health status of over 150 million human subjects observed over a decade, we investigated the source and the mechanistic triggers of influenza epidemics. We conclude that the initiation of a pan-continental influenza wave emerges from the simultaneous realization of a complex set of conditions. The strongest predictor groups are as follows, ranked by importance: (1) the host population’s socio- and ethno-demographic properties; (2) weather variables pertaining to specific humidity, temperature, and solar radiation; (3) the virus’ antigenic drift over time; (4) the host population’s land-based travel habits, and; (5) recent spatio-temporal dynamics, as reflected in the influenza wave auto-correlation. The models we infer are demonstrably predictive (area under the Receiver Operating Characteristic curve 80%) when tested with out-of-sample data, opening the door to the potential formulation of new population-level intervention and mitigation policies. |
2017
|
Tanya Y. Berger-Wolf, Daniel I. Rubenstein, Charles V. Stewart, Jason A. Holmberg, Jason Parham, Jonathan Crall, Sreejith Menon, Jon Van Oast, Emre Kıcıman, Lucas Joppa Wildbook: Crowdsourcing, computer vision, and data science for conservation Proceedings Article In: Bloomberg Data For Good Exchange, 2017. @inproceedings{bergerwolf2017wildbook,
title = {Wildbook: Crowdsourcing, computer vision, and data science for conservation},
author = {Tanya Y. Berger-Wolf and Daniel I. Rubenstein and Charles V. Stewart and Jason A. Holmberg and Jason Parham and Jonathan Crall and Sreejith Menon and Jon Van Oast and Emre Kıcıman and Lucas Joppa},
year = {2017},
date = {2017-09-24},
booktitle = {Bloomberg Data For Good Exchange},
abstract = {Photographs, taken by field scientists, tourists, automated cameras, and incidental photographers, are the most abundant source of data on wildlife today. Wildbook is an autonomous computational system that starts from massive collections of images and, by detecting various species of animals and identifying individuals, combined with sophisticated data management, turns them into a high resolution information database, enabling scientific inquiry, conservation, and citizen science.
We have built Wildbooks for whales (flukebook.org), sharks (whaleshark.org), two species of zebras (Grevy's and plains), and several others. In January 2016, Wildbook enabled the first ever full species (the endangered Grevy's zebra) census using photographs taken by ordinary citizens in Kenya. The resulting numbers are now the official species census used by the IUCN Red List: http://www.iucnredlist.org/details/7950/0. In 2016, Wildbook partnered up with WWF to build Wildbook for Sea Turtles, Internet of Turtles (IoT), as well as systems for seals and lynx. Most recently, we have demonstrated that we can now use publicly available social media images to count and track wild animals. In this paper, we present and discuss both the impact and challenges that the use of crowdsourced images can have on wildlife conservation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Photographs, taken by field scientists, tourists, automated cameras, and incidental photographers, are the most abundant source of data on wildlife today. Wildbook is an autonomous computational system that starts from massive collections of images and, by detecting various species of animals and identifying individuals, combined with sophisticated data management, turns them into a high resolution information database, enabling scientific inquiry, conservation, and citizen science.
We have built Wildbooks for whales (flukebook.org), sharks (whaleshark.org), two species of zebras (Grevy's and plains), and several others. In January 2016, Wildbook enabled the first ever full species (the endangered Grevy's zebra) census using photographs taken by ordinary citizens in Kenya. The resulting numbers are now the official species census used by the IUCN Red List: http://www.iucnredlist.org/details/7950/0. In 2016, Wildbook partnered up with WWF to build Wildbook for Sea Turtles, Internet of Turtles (IoT), as well as systems for seals and lynx. Most recently, we have demonstrated that we can now use publicly available social media images to count and track wild animals. In this paper, we present and discuss both the impact and challenges that the use of crowdsourced images can have on wildlife conservation. |
Ishanu Chattopadhyay, Emre Kıcıman, Joshua Elliott, Jeffrey Shaman, Andrey Rzhetsky Conjunction of Factors Triggering Waves of Seasonal Influenza Online Biorxiv 2017. @online{chattopadhyay2017conjunction,
title = {Conjunction of Factors Triggering Waves of Seasonal Influenza},
author = {Ishanu Chattopadhyay and Emre Kıcıman and Joshua Elliott and Jeffrey Shaman and Andrey Rzhetsky},
url = {https://kiciman.org/wp-content/uploads/2017/08/factorstriggeringflu_biorxiv.pdf},
doi = {10.1101/168476},
year = {2017},
date = {2017-07-27},
journal = {bioRxiv},
pages = {168476},
publisher = {Cold Spring Harbor Labs Journals},
organization = {Biorxiv},
abstract = {Understanding the subtle confluence of factors triggering pan-continental, seasonal epidemics of influenza-like illness is an
extremely important problem, with the potential to save tens of thousands of lives and billions of dollars every year in the
US alone. Beginning with several large, longitudinal datasets on putative factors and clinical data on the disease and health
status of over 150 million human subjects observed over a decade, we investigated the source and the mechanistic triggers of
epidemics. Our analysis included insurance claims for a significant cross-section of the US population in the past decade, human
movement patterns inferred from billions of tweets, whole-US weekly weather data covering the same time span as the medical
records, data on vaccination coverage over the same period, and sequence variations of key viral proteins. We also explicitly
accounted for the spatio-temporal auto-correlations of infectious waves, and a host of socioeconomic and demographic factors.
We carried out multiple orthogonal statistical analyses on these diverse, large geo-temporal datasets to bolster and corroborate
our findings. We conclude that the initiation of a pan-continental influenza wave emerges from the simultaneous realization of a
complex set of conditions, the strongest predictor groups are as follows, ranked by importance: (1) the host population’s socioand
ethno-demographic properties; (2) weather variables pertaining to relevant area specific humidity, temperature, and solar
radiation; (3) the virus’ antigenic drift over time; (4) the host population’s land-based travel habits, and; (5) the spatio-temporal
dynamics’ immediate history, as reflected in the influenza wave autocorrelation. The models we infer are demonstrably predictive
(area under the Receiver Operating Characteristic curve ≈ 80%) when tested with out-of-sample data, opening the door to the
potential formulation of new population-level intervention and mitigation policies.
},
keywords = {},
pubstate = {published},
tppubtype = {online}
}
Understanding the subtle confluence of factors triggering pan-continental, seasonal epidemics of influenza-like illness is an
extremely important problem, with the potential to save tens of thousands of lives and billions of dollars every year in the
US alone. Beginning with several large, longitudinal datasets on putative factors and clinical data on the disease and health
status of over 150 million human subjects observed over a decade, we investigated the source and the mechanistic triggers of
epidemics. Our analysis included insurance claims for a significant cross-section of the US population in the past decade, human
movement patterns inferred from billions of tweets, whole-US weekly weather data covering the same time span as the medical
records, data on vaccination coverage over the same period, and sequence variations of key viral proteins. We also explicitly
accounted for the spatio-temporal auto-correlations of infectious waves, and a host of socioeconomic and demographic factors.
We carried out multiple orthogonal statistical analyses on these diverse, large geo-temporal datasets to bolster and corroborate
our findings. We conclude that the initiation of a pan-continental influenza wave emerges from the simultaneous realization of a
complex set of conditions, the strongest predictor groups are as follows, ranked by importance: (1) the host population’s socioand
ethno-demographic properties; (2) weather variables pertaining to relevant area specific humidity, temperature, and solar
radiation; (3) the virus’ antigenic drift over time; (4) the host population’s land-based travel habits, and; (5) the spatio-temporal
dynamics’ immediate history, as reflected in the influenza wave autocorrelation. The models we infer are demonstrably predictive
(area under the Receiver Operating Characteristic curve ≈ 80%) when tested with out-of-sample data, opening the door to the
potential formulation of new population-level intervention and mitigation policies.
|
Sreejith Menon, Tanya Berger-Wolf, Emre Kıcıman, Lucas Joppa, Charles V. Stewart, Jason Parham, Jonathan Crall, Jason Holmberg, Jonathan Van Oast Animal Population Estimation Using Flickr Images Proceedings Article In: In Proceedings of the 2nd International Workshop on the Social Web for Environmental and Ecological Monitoring (SWEEM 2017), ACM 2017. @inproceedings{menon2017animal,
title = {Animal Population Estimation Using Flickr Images},
author = {Sreejith Menon and Tanya Berger-Wolf and Emre Kıcıman and Lucas Joppa and Charles V. Stewart and Jason Parham and Jonathan Crall and Jason Holmberg and Jonathan Van Oast},
url = {https://kiciman.org/wp-content/uploads/2017/08/AnimalWildlifeEstimationUsingSocialMedia_SWEEM17.pdf},
year = {2017},
date = {2017-06-25},
booktitle = {In Proceedings of the 2nd International Workshop on the Social Web for Environmental and Ecological Monitoring (SWEEM 2017)},
organization = {ACM},
abstract = {While the technologies of the Information Age have produced staggering amounts of data about people, they are by and large failing the world’s wildlife. Even the simplest and most critical piece of information, the number of animals of a species, is either unknown or is uncertain for most species. Here, we propose to use images of wildlife posted on social media platforms, together with animal recognition software and mark-recapture models, to estimate population sizes. We show that population size estimates from social media photographs of animals can produce robust results, yet more work is needed to understand biases inherent in the approach.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
While the technologies of the Information Age have produced staggering amounts of data about people, they are by and large failing the world’s wildlife. Even the simplest and most critical piece of information, the number of animals of a species, is either unknown or is uncertain for most species. Here, we propose to use images of wildlife posted on social media platforms, together with animal recognition software and mark-recapture models, to estimate population sizes. We show that population size estimates from social media photographs of animals can produce robust results, yet more work is needed to understand biases inherent in the approach. |
Munmun De Choudhury, Emre Kıcıman The Language of Social Support in Social Media and Its Effect on Suicidal Ideation Risk. Proceedings Article In: Proceedings of the International Conference on Web and Social Media (ICWSM), pp. 32–41, AAAI, 2017. @inproceedings{de2017language,
title = {The Language of Social Support in Social Media and Its Effect on Suicidal Ideation Risk.},
author = {Munmun De Choudhury and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/SocialSupport_ICWSM.pdf},
year = {2017},
date = {2017-05-15},
booktitle = {Proceedings of the International Conference on Web and Social Media (ICWSM)},
pages = {32--41},
publisher = {AAAI},
abstract = {Online social support is known to play a significant role in mental well-being. However, current research is limited in its ability to quantify this link. Challenges exist due to the paucity of longitudinal, pre- and post-mental illness risk data, and reliable methods that can examine causality between past availability of support and future risk. In this paper, we propose a method to measure how the language of comments in Reddit mental health communities influences risk to suicidal ideation in the future. Incorporating human assessments in a stratified propensity score analysis based framework, we identify comparable subpopulations of individuals and measure the effect of online social support language. We interpret these linguistic cues with an established theoretical model of social support, and find that esteem and network support play a more prominent role in reducing forthcoming risk. We discuss the implications of our work for designing tools that can improve support provisions in online communities.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Online social support is known to play a significant role in mental well-being. However, current research is limited in its ability to quantify this link. Challenges exist due to the paucity of longitudinal, pre- and post-mental illness risk data, and reliable methods that can examine causality between past availability of support and future risk. In this paper, we propose a method to measure how the language of comments in Reddit mental health communities influences risk to suicidal ideation in the future. Incorporating human assessments in a stratified propensity score analysis based framework, we identify comparable subpopulations of individuals and measure the effect of online social support language. We interpret these linguistic cues with an established theoretical model of social support, and find that esteem and network support play a more prominent role in reducing forthcoming risk. We discuss the implications of our work for designing tools that can improve support provisions in online communities. |
Alexandra Olteanu, Onur Varol, Emre Kıcıman Distilling the Outcomes of Personal Experiences: A Propensity-scored Analysis of Social Media. Proceedings Article In: Proceedings of Computer-Supported Cooperative Work and Social Computing, pp. 370–386, ACM, 2017. @inproceedings{olteanu2017distilling,
title = {Distilling the Outcomes of Personal Experiences: A Propensity-scored Analysis of Social Media.},
author = {Alexandra Olteanu and Onur Varol and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/cscw17-distilling-the-outcomes-of-personal-experiences-a-propensity-scored-analysis.pdf},
doi = {10.1145/2998181.2998353},
year = {2017},
date = {2017-02-25},
booktitle = {Proceedings of Computer-Supported Cooperative Work and Social Computing},
pages = {370--386},
publisher = {ACM},
abstract = {Millions of people regularly report the details of their real-world experiences on social media. This provides an opportunity to observe the outcomes of common and critical situations. Identifying and quantifying these outcomes may provide better decision-support and goal-achievement for individuals, and help policy-makers and scientists better understand important societal phenomena.
We address several open questions about using social media data for open-domain outcome identification: Are the words people are more likely to use after some experience relevant to this experience? How well do these words cover the breadth of outcomes likely to occur for an experience? What kinds of outcomes are discovered? Studying 3-months of Twitter data capturing people who experienced 39 distinct situations across a variety of domains, we find that these outcomes are generally found to be relevant (55-100% on average) and that causally related concepts are more likely to be discovered than conceptual or semantically related concepts.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Millions of people regularly report the details of their real-world experiences on social media. This provides an opportunity to observe the outcomes of common and critical situations. Identifying and quantifying these outcomes may provide better decision-support and goal-achievement for individuals, and help policy-makers and scientists better understand important societal phenomena.
We address several open questions about using social media data for open-domain outcome identification: Are the words people are more likely to use after some experience relevant to this experience? How well do these words cover the breadth of outcomes likely to occur for an experience? What kinds of outcomes are discovered? Studying 3-months of Twitter data capturing people who experienced 39 distinct situations across a variety of domains, we find that these outcomes are generally found to be relevant (55-100% on average) and that causally related concepts are more likely to be discovered than conceptual or semantically related concepts. |
2016
|
Alexandra Olteanu, Carlos Castillo, Fernando Diaz, Emre Kıcıman Social data: Biases, methodological pitfalls, and ethical boundaries Online SSRN 2016. @online{olteanu2016social,
title = {Social data: Biases, methodological pitfalls, and ethical boundaries},
author = {Alexandra Olteanu and Carlos Castillo and Fernando Diaz and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/SSRN-id2886526.pdf},
year = {2016},
date = {2016-12-20},
organization = {SSRN},
abstract = {Social data in digital form, which includes user-generated content, expressed or implicit relationships between people, and behavioral traces, are at the core of many popular applications and platforms, driving the research agenda of many researchers. The promises of social data are many, including understanding "what the world thinks" about a social issue, brand, product, celebrity, or other entity, as well as enabling better decision-making in a variety of fields including public policy, healthcare, and economics. Many academics and practitioners have warned against the naïve usage of social data. There are biases and inaccuracies occurring at the source of the data, but also introduced during processing. There are methodological limitations and pitfalls, as well as ethical boundaries and unexpected consequences that are often overlooked. This survey recognizes the rigor with which these issues are addressed by different researchers varies across a wide range. We present a framework for identifying a broad variety of menaces in the research and practices around social data use.},
keywords = {},
pubstate = {published},
tppubtype = {online}
}
Social data in digital form, which includes user-generated content, expressed or implicit relationships between people, and behavioral traces, are at the core of many popular applications and platforms, driving the research agenda of many researchers. The promises of social data are many, including understanding "what the world thinks" about a social issue, brand, product, celebrity, or other entity, as well as enabling better decision-making in a variety of fields including public policy, healthcare, and economics. Many academics and practitioners have warned against the naïve usage of social data. There are biases and inaccuracies occurring at the source of the data, but also introduced during processing. There are methodological limitations and pitfalls, as well as ethical boundaries and unexpected consequences that are often overlooked. This survey recognizes the rigor with which these issues are addressed by different researchers varies across a wide range. We present a framework for identifying a broad variety of menaces in the research and practices around social data use. |
Golnoosh Farnadi, Emre Kıcıman Idiomatic Application of Causal Analysis to Social Media Timelines: Opportunities and Challenges Proceedings Article In: Inference and Learning of Hypothetical and Counterfactual Interventions in Complex Systems, a workshop at NIPS, 2016. @inproceedings{farnadi2016idiomatic,
title = {Idiomatic Application of Causal Analysis to Social Media Timelines: Opportunities and Challenges},
author = {Golnoosh Farnadi and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/whatif_idiomaticapplicationsofcausalanalysis.pdf},
year = {2016},
date = {2016-12-10},
booktitle = {Inference and Learning of Hypothetical and Counterfactual Interventions in Complex Systems, a workshop at NIPS},
abstract = {Causal inference, from experimental and observational studies, is critical to answering important
questions in natural, social and digital systems. Unfortunately, applying causal inference to large
systems—such as markets, societies or even teams of people—presents critical challenges in causal
inference due to network effects, feedback loops and other complications. While many causal
methods have been introduced and are applicable to some of these problems, their use requires
careful thought and adaptation by experts. But what if we could identify a (large) class of important
questions that could be answered without repeated expert intervention? We identify such a broad
class of simple questions about individual experiences—essentially, what happens after a person
takes some action or has some experience—that can be answered through analysis of a large-scale
corpus of individual-level social media timelines under ignorability and SUTVA assumptions. Our
goal is to create a framework for data processing and causal inference methods that can best answer
these action-outcome questions from social media timelines.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Causal inference, from experimental and observational studies, is critical to answering important
questions in natural, social and digital systems. Unfortunately, applying causal inference to large
systems—such as markets, societies or even teams of people—presents critical challenges in causal
inference due to network effects, feedback loops and other complications. While many causal
methods have been introduced and are applicable to some of these problems, their use requires
careful thought and adaptation by experts. But what if we could identify a (large) class of important
questions that could be answered without repeated expert intervention? We identify such a broad
class of simple questions about individual experiences—essentially, what happens after a person
takes some action or has some experience—that can be answered through analysis of a large-scale
corpus of individual-level social media timelines under ignorability and SUTVA assumptions. Our
goal is to create a framework for data processing and causal inference methods that can best answer
these action-outcome questions from social media timelines.
|
Abhimanyu Das, Sreenivas Gollapudi, Emre Kiciman, Onur Varol Information dissemination in heterogeneous-intent networks Proceedings Article In: Proceedings of the 8th ACM Conference on Web Science, pp. 259–268, ACM ACM, 2016. @inproceedings{das2016information,
title = {Information dissemination in heterogeneous-intent networks},
author = {Abhimanyu Das and Sreenivas Gollapudi and Emre K{i}c{i}man and Onur Varol},
url = {https://kiciman.org/wp-content/uploads/2017/08/heterogeneous-intent_websci16-1.pdf},
doi = {10.1145/2908131.2908161},
year = {2016},
date = {2016-05-22},
booktitle = {Proceedings of the 8th ACM Conference on Web Science},
pages = {259--268},
publisher = {ACM},
organization = {ACM},
abstract = {Many qualitative studies of communication practices on social media have recognized that people's motivation for participating in social networks can vary greatly. Some people participate for fame and fortune, while others simply wish to chat with friends. In this paper, we study the implications of such heterogeneous intent for modeling information diffusion in social networks. We experiment with user-level perception of messages, analyze large-scale information cascades, and model information diffusion in heterogeneous-intent networks. We perform carefully designed user studies to establish the relationship between the intent and language style of a message sender. Style of the user appear to adapt their language to achieve different intents. We perform a large-scale data analysis on Twitter message cascades and confirm that message propagation through a network is correlated with historical representations of individuals' intents. Finally, we posit a simple analytical model of information diffusion in social networks that takes heterogeneous intents into account and find that this model is able to explain empirically observed properties of structural virality that are not explained by current models.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Many qualitative studies of communication practices on social media have recognized that people's motivation for participating in social networks can vary greatly. Some people participate for fame and fortune, while others simply wish to chat with friends. In this paper, we study the implications of such heterogeneous intent for modeling information diffusion in social networks. We experiment with user-level perception of messages, analyze large-scale information cascades, and model information diffusion in heterogeneous-intent networks. We perform carefully designed user studies to establish the relationship between the intent and language style of a message sender. Style of the user appear to adapt their language to achieve different intents. We perform a large-scale data analysis on Twitter message cascades and confirm that message propagation through a network is correlated with historical representations of individuals' intents. Finally, we posit a simple analytical model of information diffusion in social networks that takes heterogeneous intents into account and find that this model is able to explain empirically observed properties of structural virality that are not explained by current models. |
Munmun De Choudhury, Emre Kıcıman, Mark Dredze, Glen Coppersmith, Mrinal Kumar Discovering shifts to suicidal ideation from mental health content in social media Proceedings Article In: Proceedings of the 2016 CHI Conference on Human Factors in Computing Systems, pp. 2098–2110, ACM ACM, 2016, ISBN: 978-1-4503-3362-7. @inproceedings{de2016discovering,
title = {Discovering shifts to suicidal ideation from mental health content in social media},
author = {Munmun De Choudhury and Emre Kıcıman and Mark Dredze and Glen Coppersmith and Mrinal Kumar},
url = {https://kiciman.org/wp-content/uploads/2017/08/2016_chi_discoveringshifts.pdf},
doi = {10.1145/2858036.2858207},
isbn = {978-1-4503-3362-7},
year = {2016},
date = {2016-05-07},
booktitle = {Proceedings of the 2016 CHI Conference on Human Factors in Computing Systems},
pages = {2098--2110},
publisher = {ACM},
organization = {ACM},
abstract = {History of mental illness is a major factor behind suicide risk and ideation. However research efforts toward characterizing and forecasting this risk is limited due to the paucity of information regarding suicide ideation, exacerbated by the stigma of mental illness. This paper fills gaps in the literature by developing a statistical methodology to infer which individuals could undergo transitions from mental health discourse to suicidal ideation. We utilize semi-anonymous support communities on Reddit as unobtrusive data sources to infer the likelihood of these shifts. We develop language and interactional measures for this purpose, as well as a propensity score matching based statistical approach. Our approach allows us to derive distinct markers of shifts to suicidal ideation. These markers can be modeled in a prediction framework to identify individuals likely to engage in suicidal ideation in the future. We discuss societal and ethical implications of this research.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
History of mental illness is a major factor behind suicide risk and ideation. However research efforts toward characterizing and forecasting this risk is limited due to the paucity of information regarding suicide ideation, exacerbated by the stigma of mental illness. This paper fills gaps in the literature by developing a statistical methodology to infer which individuals could undergo transitions from mental health discourse to suicidal ideation. We utilize semi-anonymous support communities on Reddit as unobtrusive data sources to infer the likelihood of these shifts. We develop language and interactional measures for this purpose, as well as a propensity score matching based statistical approach. Our approach allows us to derive distinct markers of shifts to suicidal ideation. These markers can be modeled in a prediction framework to identify individuals likely to engage in suicidal ideation in the future. We discuss societal and ethical implications of this research. |
Alexandra Olteanu, Onur Varol, Emre Kıcıman Towards an Open-Domain Framework for Distilling the Outcomes of Personal Experiences from Social Media Timelines. Proceedings Article In: Proceedings of Intl. Conference on Web and Social Media (ICWSM), pp. 647–650, AAAI, 2016. @inproceedings{olteanu2016towards,
title = {Towards an Open-Domain Framework for Distilling the Outcomes of Personal Experiences from Social Media Timelines.},
author = {Alexandra Olteanu and Onur Varol and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/13113-57889-1-PB.pdf},
year = {2016},
date = {2016-03-31},
booktitle = {Proceedings of Intl. Conference on Web and Social Media (ICWSM)},
pages = {647--650},
publisher = {AAAI},
abstract = {Millions of people share details about their real-world experiences on social media. This provides an opportunity to observe the outcomes of common and critical situations and actions for individual and societal benefit. In this paper, we discuss our efforts to design and build an open-domain framework for mining the outcomes of any given experience from social media timelines. Through a number of example situations and actions across multiple domains, we discuss the kinds of outcomes we are able to extract and their relevance.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Millions of people share details about their real-world experiences on social media. This provides an opportunity to observe the outcomes of common and critical situations and actions for individual and societal benefit. In this paper, we discuss our efforts to design and build an open-domain framework for mining the outcomes of any given experience from social media timelines. Through a number of example situations and actions across multiple domains, we discuss the kinds of outcomes we are able to extract and their relevance. |
Munmun De Choudhury, Sanket Sharma, Emre Kıcıman Characterizing dietary choices, nutrition, and language in food deserts via social media Proceedings Article In: Proceedings of the 19th ACM Conference on Computer-Supported Cooperative Work & Social Computing, pp. 1157–1170, ACM 2016, ISBN: 978-1-4503-3592-8. @inproceedings{de2016characterizing,
title = {Characterizing dietary choices, nutrition, and language in food deserts via social media},
author = {Munmun De Choudhury and Sanket Sharma and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/cscw_fooddeserts.pdf},
doi = {10.1145/2818048.2819956},
isbn = {978-1-4503-3592-8},
year = {2016},
date = {2016-02-27},
booktitle = {Proceedings of the 19th ACM Conference on Computer-Supported Cooperative Work & Social Computing},
pages = {1157--1170},
organization = {ACM},
abstract = {Social media has emerged as a promising source of data for public health. This paper examines how these platforms can provide empirical quantitative evidence for understanding dietary choices and nutritional challenges in “food deserts” -- Census tracts characterized by poor access to healthy and affordable food. We present a study of 3 million food related posts shared on Instagram, and observe that content from food deserts indicate consumption of food high in fat, cholesterol and sugar; a rate higher by 5-17% compared to non-food desert areas. Further, a topic model analysis reveals the ingestion language of food deserts to bear distinct attributes. Finally, we investigate to what extent Instagram ingestion language is able to infer whether a tract is a food desert. We find that a predictive model that uses ingestion topics, socio-economic and food deprivation status attributes yields high accuracy (>80%) and improves over baseline methods by 6-14%. We discuss the role of social media in helping address inequalities in food access and health.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Social media has emerged as a promising source of data for public health. This paper examines how these platforms can provide empirical quantitative evidence for understanding dietary choices and nutritional challenges in “food deserts” -- Census tracts characterized by poor access to healthy and affordable food. We present a study of 3 million food related posts shared on Instagram, and observe that content from food deserts indicate consumption of food high in fat, cholesterol and sugar; a rate higher by 5-17% compared to non-food desert areas. Further, a topic model analysis reveals the ingestion language of food deserts to bear distinct attributes. Finally, we investigate to what extent Instagram ingestion language is able to infer whether a tract is a food desert. We find that a predictive model that uses ingestion topics, socio-economic and food deprivation status attributes yields high accuracy (>80%) and improves over baseline methods by 6-14%. We discuss the role of social media in helping address inequalities in food access and health. |
Fernando Diaz, Michael Gamon, Jake M Hofman, Emre Kıcıman, David Rothschild Online and social media data as an imperfect continuous panel survey Journal Article In: PLoS ONE, vol. 11, no. 1, pp. e0145406, 2016. @article{diaz2016online,
title = {Online and social media data as an imperfect continuous panel survey},
author = {Fernando Diaz and Michael Gamon and Jake M Hofman and Emre Kıcıman and David Rothschild},
url = {http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0145406
https://kiciman.org/wp-content/uploads/2017/08/plosone_onlineandsocialmediadata.pdf},
doi = {10.1371/journal.pone.0145406},
year = {2016},
date = {2016-01-05},
journal = {PLoS ONE},
volume = {11},
number = {1},
pages = {e0145406},
publisher = {Public Library of Science},
abstract = {There is a large body of research on utilizing online activity as a survey of political opinion to predict real world election outcomes. There is considerably less work, however, on using this data to understand topic-specific interest and opinion amongst the general population and specific demographic subgroups, as currently measured by relatively expensive surveys. Here we investigate this possibility by studying a full census of all Twitter activity during the 2012 election cycle along with the comprehensive search history of a large panel of Internet users during the same period, highlighting the challenges in interpreting online and social media activity as the results of a survey. As noted in existing work, the online population is a non-representative sample of the offline world (e.g., the U.S. voting population). We extend this work to show how demographic skew and user participation is non-stationary and difficult to predict over time. In addition, the nature of user contributions varies substantially around important events. Furthermore, we note subtle problems in mapping what people are sharing or consuming online to specific sentiment or opinion measures around a particular topic. We provide a framework, built around considering this data as an imperfect continuous panel survey, for addressing these issues so that meaningful insight about public interest and opinion can be reliably extracted from online and social media data.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
There is a large body of research on utilizing online activity as a survey of political opinion to predict real world election outcomes. There is considerably less work, however, on using this data to understand topic-specific interest and opinion amongst the general population and specific demographic subgroups, as currently measured by relatively expensive surveys. Here we investigate this possibility by studying a full census of all Twitter activity during the 2012 election cycle along with the comprehensive search history of a large panel of Internet users during the same period, highlighting the challenges in interpreting online and social media activity as the results of a survey. As noted in existing work, the online population is a non-representative sample of the offline world (e.g., the U.S. voting population). We extend this work to show how demographic skew and user participation is non-stationary and difficult to predict over time. In addition, the nature of user contributions varies substantially around important events. Furthermore, we note subtle problems in mapping what people are sharing or consuming online to specific sentiment or opinion measures around a particular topic. We provide a framework, built around considering this data as an imperfect continuous panel survey, for addressing these issues so that meaningful insight about public interest and opinion can be reliably extracted from online and social media data. |
2015
|
Benjamin S Livshits, Emre Mehmet Kiciman, David Simmons Edge computing platform for delivery of rich internet applications Miscellaneous 2015, (US Patent 9,152,411). @misc{livshits2015edge,
title = {Edge computing platform for delivery of rich internet applications},
author = {Benjamin S Livshits and Emre Mehmet Kiciman and David Simmons},
year = {2015},
date = {2015-10-01},
note = {US Patent 9,152,411},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Paul N Bennett, Emre M Kiciman, Alexander Fishkov Searching Based on the Persona of Another Miscellaneous 2015, (US Patent App. 14/884,789). @misc{bennett2015searching,
title = {Searching Based on the Persona of Another},
author = {Paul N Bennett and Emre M Kiciman and Alexander Fishkov},
year = {2015},
date = {2015-10-01},
note = {US Patent App. 14/884,789},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Emre Kıcıman, Matthew Richardson Towards Decision Support and Goal Achievement: Identifying Action-Outcome Relationships From Social Media Proceedings Article In: Proceedings of the 21st ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 547–556, ACM ACM, 2015, ISBN: 978-1-4503-3664-2. @inproceedings{kiciman2015towardsb,
title = {Towards Decision Support and Goal Achievement: Identifying Action-Outcome Relationships From Social Media},
author = {Emre Kıcıman and Matthew Richardson},
url = {https://kiciman.org/wp-content/uploads/2017/08/kdd15_actionoutcome_cr.pdf},
doi = {10.1145/2783258.2783310},
isbn = {978-1-4503-3664-2},
year = {2015},
date = {2015-08-10},
booktitle = {Proceedings of the 21st ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
pages = {547--556},
publisher = {ACM},
organization = {ACM},
abstract = {Every day, people take actions, trying to achieve their personal, high-order goals. People decide what actions to take based on their personal experience, knowledge and gut instinct. While this leads to positive outcomes for some people, many others do not have the necessary experience, knowledge and instinct to make good decisions. What if, rather than making decisions based solely on their own personal experience, people could take advantage of the reported experiences of hundreds of millions of other people?
In this paper, we investigate the feasibility of mining the relationship between actions and their outcomes from the aggregated timelines of individuals posting experiential microblog reports. Our contributions include an architecture for extracting action-outcome relationships from social media data, techniques for identifying experiential social media messages and converting them to event timelines, and an analysis and evaluation of action-outcome extraction in case studies.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Every day, people take actions, trying to achieve their personal, high-order goals. People decide what actions to take based on their personal experience, knowledge and gut instinct. While this leads to positive outcomes for some people, many others do not have the necessary experience, knowledge and instinct to make good decisions. What if, rather than making decisions based solely on their own personal experience, people could take advantage of the reported experiences of hundreds of millions of other people?
In this paper, we investigate the feasibility of mining the relationship between actions and their outcomes from the aggregated timelines of individuals posting experiential microblog reports. Our contributions include an architecture for extracting action-outcome relationships from social media data, techniques for identifying experiential social media messages and converting them to event timelines, and an analysis and evaluation of action-outcome extraction in case studies. |
Paul N Bennett, Alexander Fishkov, Emre Kıcıman Persona-ization: Searching on Behalf of Others. Proceedings Article In: Proceedings of Intl Workshop on Social Personalisation and Search (SPS 2015) at SIGIR, pp. 26–32, ACM, 2015. @inproceedings{bennett2015persona,
title = {Persona-ization: Searching on Behalf of Others.},
author = {Paul N Bennett and Alexander Fishkov and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/persona-ization-sps-2015-final.pdf},
year = {2015},
date = {2015-08-09},
booktitle = {Proceedings of Intl Workshop on Social Personalisation and Search (SPS 2015) at SIGIR},
pages = {26--32},
publisher = {ACM},
abstract = {Many information retrieval tasks involve searching on behalf of
others. Example scenarios include searching for a present to give
a friend, trying to find “cool” clothes for a teenage child, looking
for medical supplies for an elderly relative, or planning a group
activity that many friends will enjoy. In this paper, we use
demographically annotated web search logs to present a large-scale
study of such “on behalf of” searches. We develop an exploratory
technique for recognizing such searches, and present information
to describe and understand the phenomenon, including the
demographics of who is searching, who they are searching for and
on what topics. },
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Many information retrieval tasks involve searching on behalf of
others. Example scenarios include searching for a present to give
a friend, trying to find “cool” clothes for a teenage child, looking
for medical supplies for an elderly relative, or planning a group
activity that many friends will enjoy. In this paper, we use
demographically annotated web search logs to present a large-scale
study of such “on behalf of” searches. We develop an exploratory
technique for recognizing such searches, and present information
to describe and understand the phenomenon, including the
demographics of who is searching, who they are searching for and
on what topics. |
Emre Kiciman, Abulimiti Aji, Kuansan Wang Entity recognition using probabilities for out-of-collection data Miscellaneous 2015, (US Patent 9,104,979). @misc{kiciman2015entity,
title = {Entity recognition using probabilities for out-of-collection data},
author = {Emre Kiciman and Abulimiti Aji and Kuansan Wang},
year = {2015},
date = {2015-08-01},
note = {US Patent 9,104,979},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Andrés Monroy-Hernández, Emre Kıcıman, Danah Boyd, Scott Counts Narcotweets: Social media in wartime Online arXiv 2015. @online{monroy2015narcotweets,
title = {Narcotweets: Social media in wartime},
author = {Andrés Monroy-Hernández and Emre Kıcıman and Danah Boyd and Scott Counts},
url = {https://kiciman.org/wp-content/uploads/2017/08/1507.01290.pdf},
year = {2015},
date = {2015-07-05},
journal = {arXiv preprint arXiv:1507.01290},
organization = {arXiv},
abstract = {This paper describes how people living in armed conflict environments use social media as a participatory news platform, in lieu of damaged state and media apparatuses. We investigate this by analyzing the microblogging practices of Mexican citizens whose everyday life is affected by the Drug War. We provide a descriptive analysis of the phenomenon, combining content and quantitative Twitter data analyses. We focus on three interrelated phenomena: general participation patterns of ordinary citizens, the emergence and role of information curators, and the tension between governmental regulation and drug cartel intimidation. This study reveals the complex tensions among citizens, media actors, and the government in light of large scale organized crime.},
keywords = {},
pubstate = {published},
tppubtype = {online}
}
This paper describes how people living in armed conflict environments use social media as a participatory news platform, in lieu of damaged state and media apparatuses. We investigate this by analyzing the microblogging practices of Mexican citizens whose everyday life is affected by the Drug War. We provide a descriptive analysis of the phenomenon, combining content and quantitative Twitter data analyses. We focus on three interrelated phenomena: general participation patterns of ordinary citizens, the emergence and role of information curators, and the tension between governmental regulation and drug cartel intimidation. This study reveals the complex tensions among citizens, media actors, and the government in light of large scale organized crime. |
Emre Kıcıman Towards learning a knowledge base of actions from experiential microblogs Proceedings Article In: AAAI Spring Symposium on Knowledge Representation and Reasoning, AAAI, 2015. @inproceedings{kiciman2015towards,
title = {Towards learning a knowledge base of actions from experiential microblogs},
author = {Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/10318-45225-1-PB.pdf},
year = {2015},
date = {2015-03-11},
booktitle = {AAAI Spring Symposium on Knowledge Representation and Reasoning},
publisher = {AAAI},
abstract = {While today's structured knowledge bases (e.g., Freebase) contain a sizable collection of information about entities, from celebrities and locations to concepts and common objects, there is a class of knowledge that has minimal coverage: actions. A large-scale knowledge base of actions would provide an opportunity for computinng devices to aid and support people's reasoning about their own actions and outcomes, leading to improved decision-making and goal achievement. In this short paper, we describe our first efforts towards building a distributional representation of actions and their outcomes, as learned from the timelines of individuals posting experiential microblogs.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
While today's structured knowledge bases (e.g., Freebase) contain a sizable collection of information about entities, from celebrities and locations to concepts and common objects, there is a class of knowledge that has minimal coverage: actions. A large-scale knowledge base of actions would provide an opportunity for computinng devices to aid and support people's reasoning about their own actions and outcomes, leading to improved decision-making and goal achievement. In this short paper, we describe our first efforts towards building a distributional representation of actions and their outcomes, as learned from the timelines of individuals posting experiential microblogs.
|
Paul Nathan Bennett, Emre Mehmet Kiciman, Peter Richard Bailey, Nikhil Dandekar, Huizhong Duan Ranking based on social activity data Miscellaneous 2015, (US Patent 8,972,399). @misc{bennett2015ranking,
title = {Ranking based on social activity data},
author = {Paul Nathan Bennett and Emre Mehmet Kiciman and Peter Richard Bailey and Nikhil Dandekar and Huizhong Duan},
year = {2015},
date = {2015-03-01},
note = {US Patent 8,972,399},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Emre Mehmet Kiciman, Chun-Kai Wang, Yi-Min Wang Generating activities based upon social data Miscellaneous 2015, (US Patent App. 14/662,868). @misc{kiciman2015generatingb,
title = {Generating activities based upon social data},
author = {Emre Mehmet Kiciman and Chun-Kai Wang and Yi-Min Wang},
year = {2015},
date = {2015-03-01},
note = {US Patent App. 14/662,868},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Emre Mehmet Kiciman, Chun-Kai Wang, Yi-Min Wang Generating activities based upon social data Miscellaneous 2015, (US Patent 9,009,226). @misc{kiciman2015generating,
title = {Generating activities based upon social data},
author = {Emre Mehmet Kiciman and Chun-Kai Wang and Yi-Min Wang},
year = {2015},
date = {2015-00-01},
note = {US Patent 9,009,226},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2014
|
Emre Kıcıman, Scott Counts, Michael Gamon, Munmun De Choudhury, Bo Thiesson Discussion Graphs: Putting Social Media Analysis in Context Proceedings Article In: Eighth International AAAI Conference on Weblogs and Social Media (ICWSM), AAAI, 2014. @inproceedings{kiciman2014discussion,
title = {Discussion Graphs: Putting Social Media Analysis in Context},
author = {Emre Kıcıman and Scott Counts and Michael Gamon and Munmun De Choudhury and Bo Thiesson},
url = {https://kiciman.org/wp-content/uploads/2017/08/discussion-graphs-8107-37752-1-PB.pdf},
year = {2014},
date = {2014-05-16},
booktitle = {Eighth International AAAI Conference on Weblogs and Social Media (ICWSM)},
publisher = {AAAI},
abstract = {Much research has focused on studying complex phenomena
through their reflection in social media, from drawing
neighborhood boundaries to inferring relationships between
medicines and diseases. While it is generally recognized in
the social sciences that such studies should be conditioned
on gender, time and other confounding factors, few of the
studies that attempt to extract information from social media
actually condition on such factors due to the difficulty in
extracting these factors from naturalistic data and the added
complexity of including them in analyses. In this paper, we
present a simple framework for specifying and implementing
common social media analyses that makes it trivial to inspect
and condition on contextual information. Our data model—
discussion graphs—captures both the structural features of
relationships inferred from social media as well as the context
of the discussions from which they are derived, such as
who is participating in the discussions, when and where the
discussions are occurring, and what else is being discussed
in conjunction. We implement our framework in a tool called
DGT , and present case studies on its use. In particular, we
show how analyses of neighborhoods and their boundaries
based on geo-located social media data can have drastically
varying results when conditioned on gender and time.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Much research has focused on studying complex phenomena
through their reflection in social media, from drawing
neighborhood boundaries to inferring relationships between
medicines and diseases. While it is generally recognized in
the social sciences that such studies should be conditioned
on gender, time and other confounding factors, few of the
studies that attempt to extract information from social media
actually condition on such factors due to the difficulty in
extracting these factors from naturalistic data and the added
complexity of including them in analyses. In this paper, we
present a simple framework for specifying and implementing
common social media analyses that makes it trivial to inspect
and condition on contextual information. Our data model—
discussion graphs—captures both the structural features of
relationships inferred from social media as well as the context
of the discussions from which they are derived, such as
who is participating in the discussions, when and where the
discussions are occurring, and what else is being discussed
in conjunction. We implement our framework in a tool called
DGT , and present case studies on its use. In particular, we
show how analyses of neighborhoods and their boundaries
based on geo-located social media data can have drastically
varying results when conditioned on gender and time. |
Fernando Diaz, Michael Gamon, Jake Hofman, Emre Kıcıman, David Rothschild Online and social media data as a flawed continuous panel survey Technical Report Microsoft Research no. MSR-TR-2014-159, 2014. @techreport{diaz2014online,
title = {Online and social media data as a flawed continuous panel survey},
author = {Fernando Diaz and Michael Gamon and Jake Hofman and Emre Kıcıman and David Rothschild},
url = {https://kiciman.org/wp-content/uploads/2017/08/flawedpanel_tr.pdf},
year = {2014},
date = {2014-05-15},
number = {MSR-TR-2014-159},
institution = {Microsoft Research},
abstract = {There is a large body of research on utilizing online activity to predict various real world outcomes, ranging from outbreaks of influenza to outcomes of elections. There is considerably less work, however, on using this data to understand topic-specific interest and opinion amongst the general population and specific demographic subgroups, as currently measured by relatively expensive surveys. Here we investigate this possibility by studying a full census of all Twitter activity during the 2012 election cycle along with comprehensive search history of a large panel of internet users during the same period, highlighting the challenges in interpreting online and social media activity as the results of a survey. As noted in existing work, the online population is a non-representative sample of the offline world (e.g., the U.S. voting population). We extend this work to show how demographic skew and user participation is non-stationary and unpredictable over time. In addition, the nature of user contributions varies wildly around important events. Finally, we note subtle problems in mapping what people are sharing or consuming online to specific sentiment or opinion measures around a particular topic. These issues must be addressed before meaningful insight about public interest and opinion can be reliably extracted from online and social media data.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
There is a large body of research on utilizing online activity to predict various real world outcomes, ranging from outbreaks of influenza to outcomes of elections. There is considerably less work, however, on using this data to understand topic-specific interest and opinion amongst the general population and specific demographic subgroups, as currently measured by relatively expensive surveys. Here we investigate this possibility by studying a full census of all Twitter activity during the 2012 election cycle along with comprehensive search history of a large panel of internet users during the same period, highlighting the challenges in interpreting online and social media activity as the results of a survey. As noted in existing work, the online population is a non-representative sample of the offline world (e.g., the U.S. voting population). We extend this work to show how demographic skew and user participation is non-stationary and unpredictable over time. In addition, the nature of user contributions varies wildly around important events. Finally, we note subtle problems in mapping what people are sharing or consuming online to specific sentiment or opinion measures around a particular topic. These issues must be addressed before meaningful insight about public interest and opinion can be reliably extracted from online and social media data. |
Abhimanyu Das, Sreenivas Gollapudi, Emre Kıcıman Effect of Persuasion on Information Diffusion in Social Networks Technical Report no. MSR-TR-2014-69, 2014. @techreport{das2014effect,
title = {Effect of Persuasion on Information Diffusion in Social Networks},
author = {Abhimanyu Das and Sreenivas Gollapudi and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/persuasion_2014-05-19.pdf},
year = {2014},
date = {2014-05-01},
journal = {Retrieved from research. microsoft. com/pubs/217325/persuasion_2014-05-19. pdf},
number = {MSR-TR-2014-69},
abstract = {One of the key factors guiding the act of communication between individuals in a social network is the desire to persuade or influence one another. In this paper, we study the interplay between a person writing (selecting) a message to send to another and the effect that the message has on its recipient. Using large-scale online user studies, we focus on a single effect (persuading or changing a recipient’s opinion about a topic) and its relationship to various measurable properties of the written message often associated with persuasive techniques, namely the degree of emotional and logical content. We find that the persuasive efficacy of these properties varies by domain of discussion and by individual susceptibility, and that senders appear to strategically select their persuasion techniques. Based on these results, we develop a structural model of information diffusion and show through simulations that the emergent larger-scale behaviors are consistent with current models of information cascades and, moreover, are able to model as yet unexplained empirically observed variance in the structural virality of cascades.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
One of the key factors guiding the act of communication between individuals in a social network is the desire to persuade or influence one another. In this paper, we study the interplay between a person writing (selecting) a message to send to another and the effect that the message has on its recipient. Using large-scale online user studies, we focus on a single effect (persuading or changing a recipient’s opinion about a topic) and its relationship to various measurable properties of the written message often associated with persuasive techniques, namely the degree of emotional and logical content. We find that the persuasive efficacy of these properties varies by domain of discussion and by individual susceptibility, and that senders appear to strategically select their persuasion techniques. Based on these results, we develop a structural model of information diffusion and show through simulations that the emergent larger-scale behaviors are consistent with current models of information cascades and, moreover, are able to model as yet unexplained empirically observed variance in the structural virality of cascades. |
George Danezis, Tuomas Aura, Shuo Chen, Emre Mehmet Kiciman Sharing of user preferences Miscellaneous 2014, (US Patent 8,700,705). @misc{danezis2014sharing,
title = {Sharing of user preferences},
author = {George Danezis and Tuomas Aura and Shuo Chen and Emre Mehmet Kiciman},
year = {2014},
date = {2014-04-01},
note = {US Patent 8,700,705},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Yuzhe Jin, Emre Kıcıman, Kuansan Wang, Ricky Loynd Entity linking at the tail: sparse signals, unknown entities, and phrase models Proceedings Article In: Proceedings of the 7th ACM international conference on Web search and data mining (WSDM), pp. 453–462, ACM, 2014, ISBN: 978-1-4503-2351-2. @inproceedings{jin2014entity,
title = {Entity linking at the tail: sparse signals, unknown entities, and phrase models},
author = {Yuzhe Jin and Emre Kıcıman and Kuansan Wang and Ricky Loynd},
url = {https://kiciman.org/wp-content/uploads/2017/08/wsdm180-jin.pdf},
doi = {10.1145/2556195.2556230},
isbn = {978-1-4503-2351-2},
year = {2014},
date = {2014-02-24},
booktitle = {Proceedings of the 7th ACM international conference on Web search and data mining (WSDM)},
pages = {453--462},
publisher = {ACM},
abstract = {Web search is seeing a paradigm shift from keyword based search to an entity-centric organization of web data. To support web search with this deeper level of understanding, a web-scale entity linking system must have 3 key properties: First, its feature extraction must be robust to the diversity of web documents and their varied writing styles and content structures. Second, it must maintain high-precision linking for "tail" (unpopular) entities that is robust to the existence of confounding entities outside of the knowledge base and entity profiles with minimal information. Finally, the system must represent large-scale knowledge bases with a scalable and powerful feature representation. We have built and deployed a web-scale unsupervised entity linking system for a commercial search engine that addresses these requirements by combining new developments in sparse signal recovery to identify the most discriminative features from noisy, free-text web documents; explicit modeling of out-of-knowledge-base entities to improve precision at the tail; and the development of a new phrase-unigram language model to efficiently capture high-order dependencies in lexical features. Using a knowledge base of 100M unique people from a popular social networking site, we present experimental results in the challenging domain of people-linking at the tail, where most entities have limited web presence. Our experimental results show that this system substantially improves on the precision-recall tradeoff over baseline methods, achieving precision over 95% with recall over 60%.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Web search is seeing a paradigm shift from keyword based search to an entity-centric organization of web data. To support web search with this deeper level of understanding, a web-scale entity linking system must have 3 key properties: First, its feature extraction must be robust to the diversity of web documents and their varied writing styles and content structures. Second, it must maintain high-precision linking for "tail" (unpopular) entities that is robust to the existence of confounding entities outside of the knowledge base and entity profiles with minimal information. Finally, the system must represent large-scale knowledge bases with a scalable and powerful feature representation. We have built and deployed a web-scale unsupervised entity linking system for a commercial search engine that addresses these requirements by combining new developments in sparse signal recovery to identify the most discriminative features from noisy, free-text web documents; explicit modeling of out-of-knowledge-base entities to improve precision at the tail; and the development of a new phrase-unigram language model to efficiently capture high-order dependencies in lexical features. Using a knowledge base of 100M unique people from a popular social networking site, we present experimental results in the challenging domain of people-linking at the tail, where most entities have limited web presence. Our experimental results show that this system substantially improves on the precision-recall tradeoff over baseline methods, achieving precision over 95% with recall over 60%. |
John D Dunagan, Albert Greenberg, Emre M Kiciman, Heather E Warncke, Alastair Wolman Scaled management system Miscellaneous 2014, (US Patent 8,666,967). @misc{dunagan2014scaled,
title = {Scaled management system},
author = {John D Dunagan and Albert Greenberg and Emre M Kiciman and Heather E Warncke and Alastair Wolman},
year = {2014},
date = {2014-01-01},
note = {US Patent 8,666,967},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Benjamin G Zorn, Jeffrey T Cohen, Dennis B Gannon, Emre M Kiciman, George M Moore, Stuart H Schaefer Datacenter execution templates Miscellaneous 2014, (US Patent 8,856,333). @misc{zorn2014datacenter,
title = {Datacenter execution templates},
author = {Benjamin G Zorn and Jeffrey T Cohen and Dennis B Gannon and Emre M Kiciman and George M Moore and Stuart H Schaefer},
year = {2014},
date = {2014-01-01},
note = {US Patent 8,856,333},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2013
|
Robert L Rounthwaite, Galen Andrew, Emre Mehmet Kiciman, Xiaoxin Yin Determining a similarity measure between queries Miscellaneous 2013, (US Patent 8,606,786). @misc{rounthwaite2013determining,
title = {Determining a similarity measure between queries},
author = {Robert L Rounthwaite and Galen Andrew and Emre Mehmet Kiciman and Xiaoxin Yin},
year = {2013},
date = {2013-12-01},
note = {US Patent 8,606,786},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Andrés Monroy-Hernández, Shelly Farnham, Emre Kıcıman, Scott Counts, Munmun De Choudhury Smart societies: from citizens as sensors to collective action Journal Article In: ACM Interactions, vol. 20, no. 4, pp. 16–19, 2013. @article{monroy2013smart,
title = {Smart societies: from citizens as sensors to collective action},
author = {Andrés Monroy-Hernández and Shelly Farnham and Emre Kıcıman and Scott Counts and Munmun De Choudhury},
url = {https://kiciman.org/wp-content/uploads/2017/08/p16-monroy-hernandez.pdf},
doi = {10.1145/2486227.2486249},
year = {2013},
date = {2013-07-01},
journal = {ACM Interactions},
volume = {20},
number = {4},
pages = {16--19},
publisher = {ACM},
abstract = {Social media has become globally ubiquitous, transforming how people are networked and mobilized. This forum explores research and applications of these new networked publics at individual, organizational, and societal levels.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Social media has become globally ubiquitous, transforming how people are networked and mobilized. This forum explores research and applications of these new networked publics at individual, organizational, and societal levels. |
Stephen Guo, Ming-Wei Chang, Emre Kıcıman To Link or Not to Link? A Study on End-to-End Tweet Entity Linking. Proceedings Article In: Proceedings of NAACL-HLT 2013, pp. 1020–1030, 2013. @inproceedings{guo2013link,
title = {To Link or Not to Link? A Study on End-to-End Tweet Entity Linking.},
author = {Stephen Guo and Ming-Wei Chang and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/GuoChKi13.pdf},
year = {2013},
date = {2013-06-10},
booktitle = {Proceedings of NAACL-HLT 2013},
pages = {1020--1030},
abstract = {Information extraction on microblog posts is an important task nowadays, as microblogs capture an unprecedented amount of information and provide a view into the pulse of the world. Given that the current definition of named entity recognition is too limited, we consider the task of Twitter entity linking in this paper.
In the current entity linking literature, mention detection and entity disambiguation are frequently cast as equally important but distinct problems. However, in our task, we find that mention detection is often the performance bottleneck. The reason is that messages on micro-blogs are short, noisy, and informal texts with little context, and often contain phrases with ambiguous meanings.
To rigorously address the Twitter entity linking problem, we propose a structural SVM algorithm for entity linking that jointly optimizes mention detection and entity disambiguation as a single end-to-end task. By combining structural learning and a variety of firstorder, second-order, and context-sensitive features, our system is able to outperform existing state-of-the art entity linking systems by 15% F1.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Information extraction on microblog posts is an important task nowadays, as microblogs capture an unprecedented amount of information and provide a view into the pulse of the world. Given that the current definition of named entity recognition is too limited, we consider the task of Twitter entity linking in this paper.
In the current entity linking literature, mention detection and entity disambiguation are frequently cast as equally important but distinct problems. However, in our task, we find that mention detection is often the performance bottleneck. The reason is that messages on micro-blogs are short, noisy, and informal texts with little context, and often contain phrases with ambiguous meanings.
To rigorously address the Twitter entity linking problem, we propose a structural SVM algorithm for entity linking that jointly optimizes mention detection and entity disambiguation as a single end-to-end task. By combining structural learning and a variety of firstorder, second-order, and context-sensitive features, our system is able to outperform existing state-of-the art entity linking systems by 15% F1. |
Yuzhe Jin, Kuansan Wang, Emre Kıcıman Sparse lexical representation for semantic entity resolution Proceedings Article In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 8575–8579, IEEE 2013. @inproceedings{jin2013sparse,
title = {Sparse lexical representation for semantic entity resolution},
author = {Yuzhe Jin and Kuansan Wang and Emre Kıcıman},
year = {2013},
date = {2013-05-26},
booktitle = {2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
pages = {8575--8579},
organization = {IEEE},
abstract = {This paper addresses the problem of semantic entity resolution (SER), which aims to determine whether some or none of the entities in a knowledge base is mentioned in a given web document. The lexical features, e.g., words and phrases, which are critical to the resolution of the semantic entities are typically of a small amount compared to all lexical features in the web document, and therefore can be modeled as sparse signals. Two techniques leveraging the principles of sparse signal recovery are proposed to identify the sparse, salient lexical features: one technique, based on the Lasso algorithm with the l2-norm distance metric, attempts to recover all the salient lexical features at once; the other technique, namely Posterior Probability Pursuit (PPP), sequentially identifies salient features one after one using the negative log posterior probability as the distance metric. Using a knowledge base consisting of about 100 million entities, we show that the proposed techniques exploiting the sparsity nature underlying SER deliver substantial performance improvement over baseline methods without sparsity consideration, demonstrating the potentials of sparse signal techniques in entity-centric web information processing.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
This paper addresses the problem of semantic entity resolution (SER), which aims to determine whether some or none of the entities in a knowledge base is mentioned in a given web document. The lexical features, e.g., words and phrases, which are critical to the resolution of the semantic entities are typically of a small amount compared to all lexical features in the web document, and therefore can be modeled as sparse signals. Two techniques leveraging the principles of sparse signal recovery are proposed to identify the sparse, salient lexical features: one technique, based on the Lasso algorithm with the l2-norm distance metric, attempts to recover all the salient lexical features at once; the other technique, namely Posterior Probability Pursuit (PPP), sequentially identifies salient features one after one using the negative log posterior probability as the distance metric. Using a knowledge base consisting of about 100 million entities, we show that the proposed techniques exploiting the sparsity nature underlying SER deliver substantial performance improvement over baseline methods without sparsity consideration, demonstrating the potentials of sparse signal techniques in entity-centric web information processing.
|
Chun-Kai Wang, B Hsu, Ming-Wei Chang, Emre Kıcıman Simple and knowledge-intensive generative model for named entity recognition Technical Report Microsoft Research no. MSR-TR-2013-3, 2013. @techreport{wang2013simple,
title = {Simple and knowledge-intensive generative model for named entity recognition},
author = {Chun-Kai Wang and B Hsu and Ming-Wei Chang and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/nlmm-msr-tr-2013-3.pdf},
year = {2013},
date = {2013-03-01},
journal = {Microsoft Research},
number = {MSR-TR-2013-3},
institution = {Microsoft Research},
abstract = {Almost all of the existing work on Named Entity Recognition (NER) consists of the following pipeline stages – part-of-speech tagging, segmentation, and named entity type classification. The requirement of hand-labeled training data on these stages makes it very expensive to extend to different domains and entity classes. Even with a large amount of hand-labeled data, existing techniques for NER on informal text, such as social media, perform poorly due to a lack of reliable capitalization, irregular sentence structure and a wide range of vocabulary.
In this paper, we address the lack of hand-labeled training data by taking advantage of weak super vision signals. We present our approach in two parts. First, we propose a novel generative model that combines the ideas from Hidden Markov Model (HMM) and n-gram language models into what we call an N-gram Language Markov Model (NLMM). Second, we utilize large-scale weak supervision signals from sources such as Wikipedia titles and the corresponding click counts to estimate parameters in NLMM. Our model is simple and can be implemented without the use of Expectation Maximization or other expensive iterative training techniques. Even with this simple model, our approach to NER on informal text outperforms existing systems trained on formal English and matches state-of-the-art NER systems trained on hand-labeled Twitter messages. Because our model does not require hand-labeled data, we can adapt our system to other domains and named entity classes very easily. We demonstrate the flexibility of our approach by successfully applying it to the different domain of extracting food dishes from restaurant reviews with very little extra work.},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
Almost all of the existing work on Named Entity Recognition (NER) consists of the following pipeline stages – part-of-speech tagging, segmentation, and named entity type classification. The requirement of hand-labeled training data on these stages makes it very expensive to extend to different domains and entity classes. Even with a large amount of hand-labeled data, existing techniques for NER on informal text, such as social media, perform poorly due to a lack of reliable capitalization, irregular sentence structure and a wide range of vocabulary.
In this paper, we address the lack of hand-labeled training data by taking advantage of weak super vision signals. We present our approach in two parts. First, we propose a novel generative model that combines the ideas from Hidden Markov Model (HMM) and n-gram language models into what we call an N-gram Language Markov Model (NLMM). Second, we utilize large-scale weak supervision signals from sources such as Wikipedia titles and the corresponding click counts to estimate parameters in NLMM. Our model is simple and can be implemented without the use of Expectation Maximization or other expensive iterative training techniques. Even with this simple model, our approach to NER on informal text outperforms existing systems trained on formal English and matches state-of-the-art NER systems trained on hand-labeled Twitter messages. Because our model does not require hand-labeled data, we can adapt our system to other domains and named entity classes very easily. We demonstrate the flexibility of our approach by successfully applying it to the different domain of extracting food dishes from restaurant reviews with very little extra work. |
Andrés Monroy-Hernández, Emre Kıcıman, Munmun De Choudhury, Scott Counts, others The new war correspondents: The rise of civic media curation in urban warfare Proceedings Article In: Proceedings of the 2013 conference on Computer Supported Cooperative Work (CSCW), pp. 1443–1452, ACM ACM, 2013. @inproceedings{monroy2013new,
title = {The new war correspondents: The rise of civic media curation in urban warfare},
author = {Andrés Monroy-Hernández and Emre Kıcıman and Munmun De Choudhury and Scott Counts and others},
url = {https://kiciman.org/wp-content/uploads/2017/08/civic-media-warfare-CSCW2013.pdf},
year = {2013},
date = {2013-02-23},
booktitle = {Proceedings of the 2013 conference on Computer Supported Cooperative Work (CSCW)},
pages = {1443--1452},
publisher = {ACM},
organization = {ACM},
abstract = {In this paper we examine the information sharing practices of people living in cities amid armed conflict. We describe the volume and frequency of microblogging activity on Twitter from four cities afflicted by the Mexican Drug War, showing how citizens use social media to alert one another and to comment on the violence that plagues their communities. We then investigate the emergence of civic media “curators,” individuals who act as “war correspondents” by aggregating and disseminating information to large numbers of people on social media. We conclude by outlining the implications of our observations for the design of civic media systems in wartime.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In this paper we examine the information sharing practices of people living in cities amid armed conflict. We describe the volume and frequency of microblogging activity on Twitter from four cities afflicted by the Mexican Drug War, showing how citizens use social media to alert one another and to comment on the violence that plagues their communities. We then investigate the emergence of civic media “curators,” individuals who act as “war correspondents” by aggregating and disseminating information to large numbers of people on social media. We conclude by outlining the implications of our observations for the design of civic media systems in wartime. |
Emre Kıcıman, Munmun De Choudhury, Scott Counts, Michael Gamon, Bo Thiesson Analyzing Social Media Relationships in Context with Discussion Graphs Proceedings Article In: Proceedings of the Eleventh Workshop on Mining and Learning With Graphs (MLG 2013), ACM, 2013. @inproceedings{kiciman2013analyzing,
title = {Analyzing Social Media Relationships in Context with Discussion Graphs},
author = {Emre Kıcıman and Munmun De Choudhury and Scott Counts and Michael Gamon and Bo Thiesson},
url = {https://kiciman.org/wp-content/uploads/2017/08/mlg2013_21.pdf},
year = {2013},
date = {2013-01-01},
booktitle = {Proceedings of the Eleventh Workshop on Mining and Learning With Graphs (MLG 2013)},
publisher = {ACM},
abstract = {We present discussion graphs, a hyper-graph-based representation of social media discussions that captures both the structural features of the relationships among entities as well as the context of the discussions from which they were derived. Building on previous analyses of social media networks that focus on the strength of relationships between entities, our discussion graphs explicitly include contextual features such as who is participating in the discussions, when and where the discussions are occurring, and what else is being discussed in conjunction. There are two contributions of this work. First, we extend standard hyper-graph representations of networks to include the distribution of contexts surrounding discussions in social media networks. Second, we demonstrate how this context is useful for understanding the results of common graph measures and analyses, such as network centrality and pseudo-cliques, when applied to the analysis of textual social media content. We apply our framework across several domains captured in Twitter, including the mining of peoples' statements about their locations and activities and discussions of the U.S. 2012 elections.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We present discussion graphs, a hyper-graph-based representation of social media discussions that captures both the structural features of the relationships among entities as well as the context of the discussions from which they were derived. Building on previous analyses of social media networks that focus on the strength of relationships between entities, our discussion graphs explicitly include contextual features such as who is participating in the discussions, when and where the discussions are occurring, and what else is being discussed in conjunction. There are two contributions of this work. First, we extend standard hyper-graph representations of networks to include the distribution of contexts surrounding discussions in social media networks. Second, we demonstrate how this context is useful for understanding the results of common graph measures and analyses, such as network centrality and pseudo-cliques, when applied to the analysis of textual social media content. We apply our framework across several domains captured in Twitter, including the mining of peoples' statements about their locations and activities and discussions of the U.S. 2012 elections. |
Anders B Vinberg, David G Campbell, James R Hamilton, Donald F Ferguson, Emre M Kiciman Highly available large scale network and internet systems Miscellaneous 2013, (US Patent 8,495,557). @misc{vinberg2013highly,
title = {Highly available large scale network and internet systems},
author = {Anders B Vinberg and David G Campbell and James R Hamilton and Donald F Ferguson and Emre M Kiciman},
year = {2013},
date = {2013-00-01},
note = {US Patent 8,495,557},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Kuansan Wang, Emre M Kiciman, Bo-June Hsu, Chun-Kai Wang Federated implicit search Miscellaneous 2013, (US Patent 8,359,311). @misc{wang2013federated,
title = {Federated implicit search},
author = {Kuansan Wang and Emre M Kiciman and Bo-June Hsu and Chun-Kai Wang},
year = {2013},
date = {2013-00-01},
note = {US Patent 8,359,311},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2012
|
Huizhong Duan, Emre Kıcıman, ChengXiang Zhai Click patterns: An empirical representation of complex query intents Proceedings Article In: Proceedings of the 21st ACM Intl. Conference on Information and Knowledge Management (CIKM), pp. 1035–1044, ACM, 2012, ISBN: 978-1-4503-1156-4. @inproceedings{duan2012click,
title = {Click patterns: An empirical representation of complex query intents},
author = {Huizhong Duan and Emre Kıcıman and ChengXiang Zhai},
url = {https://kiciman.org/wp-content/uploads/2017/08/cikm-clickpatterns.pdf},
doi = {10.1145/2396761.2398400},
isbn = {978-1-4503-1156-4},
year = {2012},
date = {2012-10-29},
booktitle = {Proceedings of the 21st ACM Intl. Conference on Information and Knowledge Management (CIKM)},
pages = {1035--1044},
publisher = {ACM},
abstract = {Understanding users' search intents is critical component of modern search engines. A key limitation made by most query log analyses is the assumption that each clicked web result represents one unique intent. However, there are many search tasks, such as comparison shopping or in-depth research, where a user's intent is to explore many documents. In these cases, the assumption of a one-to-one correspondence between clicked documents and user intent breaks down.
To capture and understand such behaviors, we propose the use of click patterns. Click patterns capture the relationship among clicks on search results by treating the set of clicks made by a user as a single unit. We aggregate click patterns together using a hierarchical clustering algorithm to discover the common click patterns. By using click patterns as an empirical representation of user intent, we are able to create a rich representation of mixtures of multiple navigational and informational intents. We analyze real search logs and demonstrate that such complex mixtures of intents do occur in the wild and can be identified using click patterns.
We further demonstrate the usefulness of click patterns by integrating them into a measure of query ambiguity and into a query recommendation task. We show that calculating query ambiguity as the entropy over the distribution of click patterns provides a measure of ambiguity with improved discriminative power, consistency and temporal stability as compared to previous measures of ambiguity. We explore the use of click pattern similarity and click pattern entropy in generating query recommendations and show promising results.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Understanding users' search intents is critical component of modern search engines. A key limitation made by most query log analyses is the assumption that each clicked web result represents one unique intent. However, there are many search tasks, such as comparison shopping or in-depth research, where a user's intent is to explore many documents. In these cases, the assumption of a one-to-one correspondence between clicked documents and user intent breaks down.
To capture and understand such behaviors, we propose the use of click patterns. Click patterns capture the relationship among clicks on search results by treating the set of clicks made by a user as a single unit. We aggregate click patterns together using a hierarchical clustering algorithm to discover the common click patterns. By using click patterns as an empirical representation of user intent, we are able to create a rich representation of mixtures of multiple navigational and informational intents. We analyze real search logs and demonstrate that such complex mixtures of intents do occur in the wild and can be identified using click patterns.
We further demonstrate the usefulness of click patterns by integrating them into a measure of query ambiguity and into a query recommendation task. We show that calculating query ambiguity as the entropy over the distribution of click patterns provides a measure of ambiguity with improved discriminative power, consistency and temporal stability as compared to previous measures of ambiguity. We explore the use of click pattern similarity and click pattern entropy in generating query recommendations and show promising results. |
Bimal Viswanath, Emre Kıcıman, Stefan Saroiu Keeping information safe from social networking apps Proceedings Article In: Proceedings of the 2012 ACM Workshop on Workshop on Online Social Networks (WOSN 2012), pp. 49–54, ACM 2012, ISBN: 978-1-4503-1480-0. @inproceedings{viswanath2012keeping,
title = {Keeping information safe from social networking apps},
author = {Bimal Viswanath and Emre Kıcıman and Stefan Saroiu},
url = {https://kiciman.org/wp-content/uploads/2017/08/wosn09-viswanath.pdf},
doi = {10.1145/2342549.2342561},
isbn = {978-1-4503-1480-0},
year = {2012},
date = {2012-08-17},
booktitle = {Proceedings of the 2012 ACM Workshop on Workshop on Online Social Networks (WOSN 2012)},
pages = {49--54},
organization = {ACM},
abstract = {The ability of third-party applications to aggregate and re-purpose personal data is a fundamental privacy weakness in today's social networking platforms. Prior work has proposed sandboxing in a hosted cloud infrastructure to prevent leakage of user information [22]. In this paper, we extend simple sandboxing to allow sharing of information among friends in a social network, and to help application developers securely aggregate user data according to differential privacy properties. Enabling these two key features requires preventing, among other subtleties, a new "Kevin Bacon" attack aimed at aggregating private data through a social network graph. We describe the significant architectural and security implications for the application framework in the Web (JavaScript) application, backend cloud, and user data handling.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
The ability of third-party applications to aggregate and re-purpose personal data is a fundamental privacy weakness in today's social networking platforms. Prior work has proposed sandboxing in a hosted cloud infrastructure to prevent leakage of user information [22]. In this paper, we extend simple sandboxing to allow sharing of information among friends in a social network, and to help application developers securely aggregate user data according to differential privacy properties. Enabling these two key features requires preventing, among other subtleties, a new "Kevin Bacon" attack aimed at aggregating private data through a social network graph. We describe the significant architectural and security implications for the application framework in the Web (JavaScript) application, backend cloud, and user data handling. |
Andrés Monroy-Hernández, Emre Kıcıman, danah boyd, Scott Counts Tweeting the Drug War: Empowerment, Intimidation, and Regulation in Social Media Proceedings Article In: In Human Computer Interaction International Conference (HCIC), 2012. @inproceedings{monroy2012tweeting,
title = {Tweeting the Drug War: Empowerment, Intimidation, and Regulation in Social Media},
author = {Andrés Monroy-Hernández and Emre Kıcıman and danah boyd and Scott Counts},
url = {https://kiciman.org/wp-content/uploads/2017/08/tweeting-war.pdf},
year = {2012},
date = {2012-06-25},
booktitle = {In Human Computer Interaction International Conference (HCIC)},
journal = {HCIC},
abstract = {In this paper, we describe how people living in armedconflict
environments use social media as a participatory
news platform. We investigate this by analyzing the
microblogging practices of people living amid the Mexican
Drug War. This paper provides a descriptive analysis of the
phenomenon by mixing quantitative observations, content
analysis, interviews, and case studies. We characterize the
volume, temporal attributes, and information sharing
methods. We focus on how citizens use social media to
alert and disseminate information about acute violent
events, and to interact with other people in their localities.
We describe how social media might start to function in
lieu of damaged state and news media apparatuses, in
particular, through the emergence of communities that
congregate around hashtags and the citizens that curate
them. Finally, we explore the tensions among citizens,
media actors, and the government in light of generalized
violence and distrust in institutions and citizens. We end by
outlining the implications for system design and
governmental intervention.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In this paper, we describe how people living in armedconflict
environments use social media as a participatory
news platform. We investigate this by analyzing the
microblogging practices of people living amid the Mexican
Drug War. This paper provides a descriptive analysis of the
phenomenon by mixing quantitative observations, content
analysis, interviews, and case studies. We characterize the
volume, temporal attributes, and information sharing
methods. We focus on how citizens use social media to
alert and disseminate information about acute violent
events, and to interact with other people in their localities.
We describe how social media might start to function in
lieu of damaged state and news media apparatuses, in
particular, through the emergence of communities that
congregate around hashtags and the citizens that curate
them. Finally, we explore the tensions among citizens,
media actors, and the government in light of generalized
violence and distrust in institutions and citizens. We end by
outlining the implications for system design and
governmental intervention. |
Emre Kıcıman OMG, i have to tweet that! a study of factors that influence tweet rates Proceedings Article In: Proceedings of the Sixth International AAAI Conference on Weblogs and Social Media (ICWSM), AAAI, 2012. @inproceedings{kiciman2012omg,
title = {OMG, i have to tweet that! a study of factors that influence tweet rates},
author = {Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/reportingbias_4659-21924-1-PB.pdf},
year = {2012},
date = {2012-06-04},
booktitle = {Proceedings of the Sixth International AAAI Conference on Weblogs and Social Media (ICWSM)},
publisher = {AAAI},
abstract = {Many studies have shown that social data such as tweets are
a rich source of information about the real world including,
for example, insights into health trends.
A key limitation when analyzing Twitter data, however, is
that it depends on people self reporting their own behaviors
and observations. In this paper, we present a large scale
quantitative analysis of some of the factors that influence
self reporting bias. In our study, we compare a year of
tweets about weather events to ground truth knowledge
about actual weather occurrences. For each weather event
we calculate how extreme, how expected, and how big a
change the event represents. We calculate the extent to
which these factors can explain the daily variations in tweet
rates about weather events. We find that we can build
global models that take into account basic weather
information, together with extremeness, expectation and
change calculations to account for over 40% of the
variability in tweet rates. We build location specific (i.e., a
model per each metropolitan area) models that account for
an average of 70% of the variability in tweet rates. },
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Many studies have shown that social data such as tweets are
a rich source of information about the real world including,
for example, insights into health trends.
A key limitation when analyzing Twitter data, however, is
that it depends on people self reporting their own behaviors
and observations. In this paper, we present a large scale
quantitative analysis of some of the factors that influence
self reporting bias. In our study, we compare a year of
tweets about weather events to ground truth knowledge
about actual weather occurrences. For each weather event
we calculate how extreme, how expected, and how big a
change the event represents. We calculate the extent to
which these factors can explain the daily variations in tweet
rates about weather events. We find that we can build
global models that take into account basic weather
information, together with extremeness, expectation and
change calculations to account for over 40% of the
variability in tweet rates. We build location specific (i.e., a
model per each metropolitan area) models that account for
an average of 70% of the variability in tweet rates. |
Danah Boyd, Gilad Lotan, Paul Oka, Emre Mehmet Kiciman, Chun-Kai Wang Presenting supplemental content in context Miscellaneous 2012, (US Patent App. 13/602,015). @misc{boyd2012presenting,
title = {Presenting supplemental content in context},
author = {Danah Boyd and Gilad Lotan and Paul Oka and Emre Mehmet Kiciman and Chun-Kai Wang},
year = {2012},
date = {2012-00-01},
note = {US Patent App. 13/602,015},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2010
|
George Danezis, Tuomas Aura, Shuo Chen, Emre Kıcıman How to share your favourite search results while preserving privacy and quality Proceedings Article In: In Proceedings of 10th Intl. Symp. on Privacy Enhancing Technologies (PETS), pp. 273–290, Springer Berlin/Heidelberg 2010. @inproceedings{danezis2010share,
title = {How to share your favourite search results while preserving privacy and quality},
author = {George Danezis and Tuomas Aura and Shuo Chen and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/privsearch.pdf},
year = {2010},
date = {2010-07-21},
booktitle = {In Proceedings of 10th Intl. Symp. on Privacy Enhancing Technologies (PETS)},
pages = {273--290},
organization = {Springer Berlin/Heidelberg},
abstract = {Personalised social search is a promising avenue to increase the relevance of search engine results by making use of recommendations made by friends in a social network. More generally a whole class of systems take user preferences, aggregate and process them, before providing a view of the result to others in a social network. Yet, those systems present privacy risks, and could be used by spammers to propagate their malicious preferences. We present a general framework to preserve privacy while maximizing the benefit of sharing information in a social network, as well as a concrete proposal making use of cohesive social group concepts from social network analysis. We show that privacy can be guaranteed in a k-anonymity manner, and disruption through spam is kept to a minimum in a real world social network.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Personalised social search is a promising avenue to increase the relevance of search engine results by making use of recommendations made by friends in a social network. More generally a whole class of systems take user preferences, aggregate and process them, before providing a view of the result to others in a social network. Yet, those systems present privacy risks, and could be used by spammers to propagate their malicious preferences. We present a general framework to preserve privacy while maximizing the benefit of sharing information in a social network, as well as a concrete proposal making use of cohesive social group concepts from social network analysis. We show that privacy can be guaranteed in a k-anonymity manner, and disruption through spam is kept to a minimum in a real world social network. |
Emre Kıcıman Language differences and metadata features on Twitter Proceedings Article In: In Proceedings of Web N-gram Workshop, at SIGIR, pp. 47, ACM, 2010. @inproceedings{kiciman2010language,
title = {Language differences and metadata features on Twitter},
author = {Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/Language20Differences20and20Metadata20Features20on20Twitter.pdf},
year = {2010},
date = {2010-07-19},
booktitle = {In Proceedings of Web N-gram Workshop, at SIGIR},
pages = {47},
publisher = {ACM},
abstract = {In the past several years, microblogging services like Twitter and Facebook have become a popular method of communication, allowing users to disseminate and gather information to and from hundreds or thousands (or even millions) of people, often in real-time. As much of the content on microblogging services is publicly accessible, we have recently seen many secondary services being built atop them, including services that perform significant content analysis, such as real-time search engines and trend analysis services. With the eventual goal of building more accurate and less expensive models of microblog streams, this paper investigates the degree to which language variance is related to the metadata of microblog content. We hypothesize that if a strong relationship exists between metadata features and language then we will be able to use this metadata as a trivial classifier to match individual messages with specialized, more accurate language models. To investigate the validity of this hypothesis, we analyze a corpus of over 72M Twitter messages, building language models conditioned on a variety of available message metadata.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In the past several years, microblogging services like Twitter and Facebook have become a popular method of communication, allowing users to disseminate and gather information to and from hundreds or thousands (or even millions) of people, often in real-time. As much of the content on microblogging services is publicly accessible, we have recently seen many secondary services being built atop them, including services that perform significant content analysis, such as real-time search engines and trend analysis services. With the eventual goal of building more accurate and less expensive models of microblog streams, this paper investigates the degree to which language variance is related to the metadata of microblog content. We hypothesize that if a strong relationship exists between metadata features and language then we will be able to use this metadata as a trivial classifier to match individual messages with specialized, more accurate language models. To investigate the validity of this hypothesis, we analyze a corpus of over 72M Twitter messages, building language models conditioned on a variety of available message metadata. |
Emre Kıcıman, Benjamin Livshits, Madanlal Musuvathi, Kevin C Webb Fluxo: a system for internet service programming by non-expert developers Proceedings Article In: Proceedings of the 1st ACM Symposium on Cloud Computing (SOCC), pp. 107–118, ACM 2010, ISBN: 978-1-4503-0036-0. @inproceedings{kiciman2010fluxo,
title = {Fluxo: a system for internet service programming by non-expert developers},
author = {Emre Kıcıman and Benjamin Livshits and Madanlal Musuvathi and Kevin C Webb},
url = {https://kiciman.org/wp-content/uploads/2017/08/socc10_final.pdf},
doi = {10.1145/1807128.1807147},
isbn = {978-1-4503-0036-0},
year = {2010},
date = {2010-06-10},
booktitle = {Proceedings of the 1st ACM Symposium on Cloud Computing (SOCC)},
pages = {107--118},
organization = {ACM},
abstract = {Over the last 10-15 years, our industry has developed and deployed many large-scale Internet services, from e-commerce to social networking sites, all facing common challenges in latency, reliability, and scalability. Over time, a relatively small number of architectural patterns have emerged to address these challenges, such as tiering, caching, partitioning, and pre- or post-processing compute intensive tasks. Unfortunately, following these patterns requires developers to have a deep understanding of the trade-offs involved in these patterns as well as an end-to-end understanding of their own system and its expected workloads. The result is that non-expert developers have a hard time applying these patterns in their code, leading to low-performing, highly suboptimal applications.
In this paper, we propose FLUXO, a system that separates an Internet service's logical functionality from the architectural decisions made to support performance, scalability, and reliability. FLUXO achieves this separation through the use of a restricted programming language designed 1) to limit a developer's ability to write programs that are incompatible with widely used Internet service architectural patterns; and 2) to simplify the analysis needed to identify how architectural patterns should be applied to programs. Because architectural patterns are often highly dependent on application performance, workloads and data distributions, our platform captures such data as a runtime profile of the application and makes it available for use when determining how to apply architectural patterns. This separation makes service development accessible to non-experts by allowing them to focus on application features and leaving complicated architectural optimizations to experts writing application-agnostic, profile-guided optimization tools.
To evaluate FLUXO, we show how a variety of architectural patterns can be expressed as transformations applied to FLUXO programs. Even simple heuristics for automatically applying these optimizations can show reductions in latency ranging from 20-90% without requiring special effort from the application developer. We also demonstrate how a simple shared-nothing tiering and replication pattern is able to scale our test suite, a web-based IM, email, and addressbook application.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Over the last 10-15 years, our industry has developed and deployed many large-scale Internet services, from e-commerce to social networking sites, all facing common challenges in latency, reliability, and scalability. Over time, a relatively small number of architectural patterns have emerged to address these challenges, such as tiering, caching, partitioning, and pre- or post-processing compute intensive tasks. Unfortunately, following these patterns requires developers to have a deep understanding of the trade-offs involved in these patterns as well as an end-to-end understanding of their own system and its expected workloads. The result is that non-expert developers have a hard time applying these patterns in their code, leading to low-performing, highly suboptimal applications.
In this paper, we propose FLUXO, a system that separates an Internet service's logical functionality from the architectural decisions made to support performance, scalability, and reliability. FLUXO achieves this separation through the use of a restricted programming language designed 1) to limit a developer's ability to write programs that are incompatible with widely used Internet service architectural patterns; and 2) to simplify the analysis needed to identify how architectural patterns should be applied to programs. Because architectural patterns are often highly dependent on application performance, workloads and data distributions, our platform captures such data as a runtime profile of the application and makes it available for use when determining how to apply architectural patterns. This separation makes service development accessible to non-experts by allowing them to focus on application features and leaving complicated architectural optimizations to experts writing application-agnostic, profile-guided optimization tools.
To evaluate FLUXO, we show how a variety of architectural patterns can be expressed as transformations applied to FLUXO programs. Even simple heuristics for automatically applying these optimizations can show reductions in latency ranging from 20-90% without requiring special effort from the application developer. We also demonstrate how a simple shared-nothing tiering and replication pattern is able to scale our test suite, a web-based IM, email, and addressbook application. |
Min-John Lee, David James Gemmell, Ashok K Chandra, Vincent Vergonjeanne, Emre M Kiciman Adjusting search results based on user social profiles Miscellaneous 2010, (US Patent App. 12/823,587). @misc{lee2010adjusting,
title = {Adjusting search results based on user social profiles},
author = {Min-John Lee and David James Gemmell and Ashok K Chandra and Vincent Vergonjeanne and Emre M Kiciman},
year = {2010},
date = {2010-06-01},
note = {US Patent App. 12/823,587},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
David J Gemmell, Min-John Lee, Ashok Chandra, Vincent Vergonjeanne, Emre M Kiciman Advertising products to groups within social networks Miscellaneous 2010, (US Patent App. 12/823,609). @misc{gemmell2010advertising,
title = {Advertising products to groups within social networks},
author = {David J Gemmell and Min-John Lee and Ashok Chandra and Vincent Vergonjeanne and Emre M Kiciman},
year = {2010},
date = {2010-06-01},
note = {US Patent App. 12/823,609},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Danah Boyd, Gilad Lotan, Paul Oka, Emre Mehmet Kiciman, Chun-Kai Wang Presenting supplemental content in context Miscellaneous 2010, (US Patent App. 12/795,392). @misc{boyd2010presenting,
title = {Presenting supplemental content in context},
author = {Danah Boyd and Gilad Lotan and Paul Oka and Emre Mehmet Kiciman and Chun-Kai Wang},
year = {2010},
date = {2010-06-01},
note = {US Patent App. 12/795,392},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Romain J Thibaux, Emre M Kiciman, David A Maltz, John C Platt Strategies for identifying anomalies in time-series data Miscellaneous 2010, (US Patent 7,716,011). @misc{thibaux2010strategies,
title = {Strategies for identifying anomalies in time-series data},
author = {Romain J Thibaux and Emre M Kiciman and David A Maltz and John C Platt},
year = {2010},
date = {2010-05-01},
note = {US Patent 7,716,011},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Emre Mehmet Kiciman, Chun-Kai Wang, Sreeharsha Kamireddy, Silviu-Petru Cucerzan Social home page Miscellaneous 2010, (US Patent App. 12/761,666). @misc{kiciman2010social,
title = {Social home page},
author = {Emre Mehmet Kiciman and Chun-Kai Wang and Sreeharsha Kamireddy and Silviu-Petru Cucerzan},
year = {2010},
date = {2010-04-01},
note = {US Patent App. 12/761,666},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2009
|
Emre Kıcıman, Benjamin Livshits, Madanlal Musuvathi CatchAndRetry: Extending exceptions to handle distributed system failures and recovery Proceedings Article In: Proceedings of the Fifth Workshop on Programming Languages and Operating Systems, pp. 4, ACM 2009. @inproceedings{kiciman2009catchandretry,
title = {CatchAndRetry: Extending exceptions to handle distributed system failures and recovery},
author = {Emre Kıcıman and Benjamin Livshits and Madanlal Musuvathi},
year = {2009},
date = {2009-10-11},
booktitle = {Proceedings of the Fifth Workshop on Programming Languages and Operating Systems},
pages = {4},
organization = {ACM},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Emre Kıcıman, Benjamin Livshits, Madanlal Musuvathi FLUXO: A Simple Service Compiler Proceedings Article In: Workshop on Hot Topics in Operating Systems (HotOS), USENIX, 2009. @inproceedings{kiciman2009fluxo,
title = {FLUXO: A Simple Service Compiler},
author = {Emre Kıcıman and Benjamin Livshits and Madanlal Musuvathi},
url = {https://kiciman.org/wp-content/uploads/2017/08/hotos09.pdf},
year = {2009},
date = {2009-05-18},
booktitle = {Workshop on Hot Topics in Operating Systems (HotOS)},
publisher = {USENIX},
abstract = {In this paper, we propose FLUXO, a system that separates an Internet service’s logical functionality from the architectural decisions made to support performance, scalability, and reliability. FLUXO achieves this separation through three mechanisms: 1) a coarse-grained dataflow-based programming model; 2) detailed runtime request tracing to capture workload distributions, performance behavior, and resource requirements; and 3) a set of analysis techniques that determine how to apply simple, parameterized dataflow transformations to optimize the service architecture for performance, scalability, and reliability. In this paper, we describe our vision for how to make Internet services easier to construct, and show how a variety of Internet service performance optimizations may be expressed as transformations applied to FLUXO programs.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
In this paper, we propose FLUXO, a system that separates an Internet service’s logical functionality from the architectural decisions made to support performance, scalability, and reliability. FLUXO achieves this separation through three mechanisms: 1) a coarse-grained dataflow-based programming model; 2) detailed runtime request tracing to capture workload distributions, performance behavior, and resource requirements; and 3) a set of analysis techniques that determine how to apply simple, parameterized dataflow transformations to optimize the service architecture for performance, scalability, and reliability. In this paper, we describe our vision for how to make Internet services easier to construct, and show how a variety of Internet service performance optimizations may be expressed as transformations applied to FLUXO programs. |
Alexander Rasmussen, Emre Kıcıman, Benjamin Livshits, Madanlal Musuvathi Improving the responsiveness of internet services with automatic cache placement Proceedings Article In: Proceedings of the 4th ACM European Conference on Computer Systems (EUROSYS), pp. 27–32, ACM 2009. @inproceedings{rasmussen2009improving,
title = {Improving the responsiveness of internet services with automatic cache placement},
author = {Alexander Rasmussen and Emre Kıcıman and Benjamin Livshits and Madanlal Musuvathi},
url = {https://kiciman.org/wp-content/uploads/2017/08/euro101s-rasmussen.pdf},
year = {2009},
date = {2009-04-01},
booktitle = {Proceedings of the 4th ACM European Conference on Computer Systems (EUROSYS)},
pages = {27--32},
organization = {ACM},
abstract = {The backends of today’s Internet services rely heavily on caching at various layers both to provide faster service to common requests and to reduce load on back-end components. Cache placement is especially challenging given the diversity of workloads handled by widely deployed Internet services. This paper presents FLUXO, an analysis technique that automatically optimizes cache placement. Our experiments have shown that near-optimal cache placements vary significantly based on input distribution.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
The backends of today’s Internet services rely heavily on caching at various layers both to provide faster service to common requests and to reduce load on back-end components. Cache placement is especially challenging given the diversity of workloads handled by widely deployed Internet services. This paper presents FLUXO, an analysis technique that automatically optimizes cache placement. Our experiments have shown that near-optimal cache placements vary significantly based on input distribution. |
Romain Thibaux, Emre Kiciman, David A Maltz Grouping failures to infer common causes Miscellaneous 2009, (US Patent 7,529,974). @misc{thibaux2009grouping,
title = {Grouping failures to infer common causes},
author = {Romain Thibaux and Emre Kiciman and David A Maltz},
year = {2009},
date = {2009-01-01},
note = {US Patent 7,529,974},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Emre Mehmet Kiciman, Wissam Kazan, Chun-Kai Wang, Aaron C Hoff, Felipe Luis Naranjo, Francislav P Penov Social browsing Miscellaneous 2009, (US Patent App. 12/614,457). @misc{kiciman2009social,
title = {Social browsing},
author = {Emre Mehmet Kiciman and Wissam Kazan and Chun-Kai Wang and Aaron C Hoff and Felipe Luis Naranjo and Francislav P Penov},
year = {2009},
date = {2009-01-01},
note = {US Patent App. 12/614,457},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2008
|
John C Platt, Emre Kıcıman, David A. Maltz Fast variational inference for large-scale internet diagnosis Proceedings Article In: In Proceedings of The Conference on Neural Information Processing Systems (NIPS), pp. 1169–1176, 2008. @inproceedings{kiciman2008fast,
title = {Fast variational inference for large-scale internet diagnosis},
author = {John C Platt and Emre Kıcıman and David A. Maltz},
url = {https://kiciman.org/wp-content/uploads/2017/08/vgd.pdf},
year = {2008},
date = {2008-12-01},
booktitle = {In Proceedings of The Conference on Neural Information Processing Systems (NIPS)},
pages = {1169--1176},
abstract = {Web servers on the Internet need to maintain high reliability, but the cause of intermittent failures of web transactions is non-obvious. We use approximate Bayesian inference to diagnose problems with web services. This diagnosis problem is far larger than any previously attempted: it requires inference of 104 possible faults from 105 observations. Further, such inference must be performed in less than a second. Inference can be done at this speed by combining a mean-field variational approximation and the use of stochastic gradient descent to optimize a variational cost function. We use this fast inference to diagnose a time series of anomalous HTTP requests taken from a real web service. The inference is fast enough to analyze network logs with billions of entries in a matter of hours.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Web servers on the Internet need to maintain high reliability, but the cause of intermittent failures of web transactions is non-obvious. We use approximate Bayesian inference to diagnose problems with web services. This diagnosis problem is far larger than any previously attempted: it requires inference of 104 possible faults from 105 observations. Further, such inference must be performed in less than a second. Inference can be done at this speed by combining a mean-field variational approximation and the use of stochastic gradient descent to optimize a variational cost function. We use this fast inference to diagnose a time series of anomalous HTTP requests taken from a real web service. The inference is fast enough to analyze network logs with billions of entries in a matter of hours. |
Benjamin Livshits, Emre Kıcıman Doloto: Code splitting for network-bound Web 2.0 applications Proceedings Article In: Proceedings of the 16th ACM SIGSOFT International Symposium on Foundations of software engineering, pp. 350–360, ACM 2008. @inproceedings{livshits2008doloto,
title = {Doloto: Code splitting for network-bound Web 2.0 applications},
author = {Benjamin Livshits and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/fse08.pdf},
year = {2008},
date = {2008-11-01},
booktitle = {Proceedings of the 16th ACM SIGSOFT International Symposium on Foundations of software engineering},
pages = {350--360},
organization = {ACM},
abstract = {Modern Web 2.0 applications, such as GMail, Live Maps, Facebook and many others, use a combination of Dynamic HTML, JavaScript and otherWeb browser technologies commonly referred to as AJAX to push application execution to the client web browser. This improves the responsiveness of these network-bound applications, but the shift of application execution from a back-end server to the client also often dramatically increases the amount of code that must first be downloaded to the browser. This creates an unfortunate Catch-22: to create responsive distributed Web 2.0 applications developers move code to the client, but for an application to be responsive, the code must first be transferred there, which takes time.
In this paper, we present DOLOTO, an optimization tool for Web 2.0 applications. DOLOTO analyzes application workloads and automatically rewrites the existing application code to introduce dynamic code loading. After being processed by DOLOTO, an application will initially transfer only the portion of code necessary for application initialization. The rest of the application’s code is replaced by short stubs—their actual implementations are transfered lazily in the background or, at the latest, on-demand on first execution of a particular application feature. Moreover, code that is rarely executed is rarely downloaded to the user browser. Because DOLOTO significantly speeds up the application startup and since subsequent code download is interleaved with application execution, applications rewritten with DOLOTO appear much more responsive to the end-user.
To demonstrate the effectiveness of DOLOTO in practice, we have performed experiments on five large widely-usedWeb 2.0 applications. DOLOTO reduces the size of application code download by hundreds of kilobytes or as much as 50% of the original download size. The time to download and begin interacting with large applications is reduced by 20-40% depending on the application and wide-area network conditions. DOLOTO especially shines on wireless and mobile connections, which are becoming increasingly important in today’s computing environments. While we performed out experiments on existing large JavaScript applications, techniques outlines in this paper can be readily incorporated into the next generation of distributing compilers such as Silverlight and Volta.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Modern Web 2.0 applications, such as GMail, Live Maps, Facebook and many others, use a combination of Dynamic HTML, JavaScript and otherWeb browser technologies commonly referred to as AJAX to push application execution to the client web browser. This improves the responsiveness of these network-bound applications, but the shift of application execution from a back-end server to the client also often dramatically increases the amount of code that must first be downloaded to the browser. This creates an unfortunate Catch-22: to create responsive distributed Web 2.0 applications developers move code to the client, but for an application to be responsive, the code must first be transferred there, which takes time.
In this paper, we present DOLOTO, an optimization tool for Web 2.0 applications. DOLOTO analyzes application workloads and automatically rewrites the existing application code to introduce dynamic code loading. After being processed by DOLOTO, an application will initially transfer only the portion of code necessary for application initialization. The rest of the application’s code is replaced by short stubs—their actual implementations are transfered lazily in the background or, at the latest, on-demand on first execution of a particular application feature. Moreover, code that is rarely executed is rarely downloaded to the user browser. Because DOLOTO significantly speeds up the application startup and since subsequent code download is interleaved with application execution, applications rewritten with DOLOTO appear much more responsive to the end-user.
To demonstrate the effectiveness of DOLOTO in practice, we have performed experiments on five large widely-usedWeb 2.0 applications. DOLOTO reduces the size of application code download by hundreds of kilobytes or as much as 50% of the original download size. The time to download and begin interacting with large applications is reduced by 20-40% depending on the application and wide-area network conditions. DOLOTO especially shines on wireless and mobile connections, which are becoming increasingly important in today’s computing environments. While we performed out experiments on existing large JavaScript applications, techniques outlines in this paper can be readily incorporated into the next generation of distributing compilers such as Silverlight and Volta. |
Emre Mehmet Kiciman, Darko Kirovski Social network referral coupons Miscellaneous 2008, (US Patent App. 12/270,974). @misc{kiciman2008social,
title = {Social network referral coupons},
author = {Emre Mehmet Kiciman and Darko Kirovski},
year = {2008},
date = {2008-11-01},
note = {US Patent App. 12/270,974},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Benjamin Livshits, Emre M Kiciman, Alexander C Rasmussen, Madanlal Musuvathi Developing and Maintaining High Performance Network Services Miscellaneous 2008, (US Patent App. 12/335,799). @misc{livshits2008developing,
title = {Developing and Maintaining High Performance Network Services},
author = {Benjamin Livshits and Emre M Kiciman and Alexander C Rasmussen and Madanlal Musuvathi},
year = {2008},
date = {2008-00-01},
note = {US Patent App. 12/335,799},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Shuo Chen, Emre M Kiciman, Yi-Min Wang Customizing Search Results Miscellaneous 2008, (US Patent App. 12/137,508). @misc{chen2008customizing,
title = {Customizing Search Results},
author = {Shuo Chen and Emre M Kiciman and Yi-Min Wang},
year = {2008},
date = {2008-00-01},
note = {US Patent App. 12/137,508},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2007
|
Emre Kıcıman, Benjamin Livshits AjaxScope: a platform for remotely monitoring the client-side behavior of Web 2.0 applications Proceedings Article In: In Proceedings of the 21st ACM Symp. on Operating Systems Principles (SOSP'07), pp. 17–30, ACM 2007, ISBN: 978-1-59593-591-5. @inproceedings{kiciman2007ajaxscope,
title = {AjaxScope: a platform for remotely monitoring the client-side behavior of Web 2.0 applications},
author = {Emre Kıcıman and Benjamin Livshits},
url = {https://kiciman.org/wp-content/uploads/2017/08/sosp095-kiciman.pdf},
doi = {10.1145/1294261.1294264},
isbn = {978-1-59593-591-5},
year = {2007},
date = {2007-10-01},
booktitle = {In Proceedings of the 21st ACM Symp. on Operating Systems Principles (SOSP'07)},
volume = {41},
number = {6},
pages = {17--30},
organization = {ACM},
abstract = {The rise of the software-as-a-service paradigm has led to the development
of a new breed of sophisticated, interactive applications
often called Web 2.0. While web applications have become larger
and more complex, web application developers today have little
visibility into the end-to-end behavior of their systems. This paper
presents AjaxScope, a dynamic instrumentation platform that
enables cross-user monitoring and just-in-time control of web application
behavior on end-user desktops. AjaxScope is a proxy that performs on-the-fly parsing and instrumentation of JavaScript
code as it is sent to users’ browsers. AjaxScope provides facilities
for distributed and adaptive instrumentation in order to reduce
the client-side overhead, while giving fine-grained visibility into
the code-level behavior of web applications. We present a variety
of policies demonstrating the power of AjaxScope, ranging from
simple error reporting and performance profiling to more complex
memory leak detection and optimization analyses. We also apply
our prototype to analyze the behavior of over 90 Web 2.0 applications
and sites that use large amounts of JavaScript.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
The rise of the software-as-a-service paradigm has led to the development
of a new breed of sophisticated, interactive applications
often called Web 2.0. While web applications have become larger
and more complex, web application developers today have little
visibility into the end-to-end behavior of their systems. This paper
presents AjaxScope, a dynamic instrumentation platform that
enables cross-user monitoring and just-in-time control of web application
behavior on end-user desktops. AjaxScope is a proxy that performs on-the-fly parsing and instrumentation of JavaScript
code as it is sent to users’ browsers. AjaxScope provides facilities
for distributed and adaptive instrumentation in order to reduce
the client-side overhead, while giving fine-grained visibility into
the code-level behavior of web applications. We present a variety
of policies demonstrating the power of AjaxScope, ranging from
simple error reporting and performance profiling to more complex
memory leak detection and optimization analyses. We also apply
our prototype to analyze the behavior of over 90 Web 2.0 applications
and sites that use large amounts of JavaScript. |
Emre Kıcıman, Helen J Wang Live monitoring: using adaptive instrumentation and analysis to debug and maintain Web applications Proceedings Article In: In Proceedings of the 11th Workshop on Hot Topics in Operating Systems (HotOS XI), USENIX, 2007. @inproceedings{kiciman2007live,
title = {Live monitoring: using adaptive instrumentation and analysis to debug and maintain Web applications},
author = {Emre Kıcıman and Helen J Wang},
url = {https://kiciman.org/wp-content/uploads/2017/08/webappmonitoring-1.pdf},
year = {2007},
date = {2007-05-01},
booktitle = {In Proceedings of the 11th Workshop on Hot Topics in Operating Systems (HotOS XI)},
publisher = {USENIX},
abstract = {AJAX-based web applications are enabling the next generation of rich, client-side web applications, but today’s web application developers do not have the end-to-end visibility required to effectively build and maintain a reliable system. We argue that a new capability of the web application environment—the ability for a system to automatically create and serve different versions of an application to each user—can be exploited for adaptive, cross-user monitoring of the behavior of web applications on end-user desktops. In this paper, we propose a live monitoring framework for building a new class of development and maintenance techniques that use a continuous loop of automatic, adaptive application rewriting, observation and analysis. We outline two such adaptive techniques for localizing data corruption bugs and automatically placing function result caching. The live monitoring framework requires only minor changes to web application servers, no changes to application code and no modifications to existing browsers.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
AJAX-based web applications are enabling the next generation of rich, client-side web applications, but today’s web application developers do not have the end-to-end visibility required to effectively build and maintain a reliable system. We argue that a new capability of the web application environment—the ability for a system to automatically create and serve different versions of an application to each user—can be exploited for adaptive, cross-user monitoring of the behavior of web applications on end-user desktops. In this paper, we propose a live monitoring framework for building a new class of development and maintenance techniques that use a continuous loop of automatic, adaptive application rewriting, observation and analysis. We outline two such adaptive techniques for localizing data corruption bugs and automatically placing function result caching. The live monitoring framework requires only minor changes to web application servers, no changes to application code and no modifications to existing browsers. |
Moises Goldszmidt, Emre M Kiciman, David A Maltz, John C Platt Mining Web Logs to Debug Wide-Area Connectivity Problems Miscellaneous 2007, (US Patent App. 11/680,483). @misc{goldszmidt2007mining,
title = {Mining Web Logs to Debug Wide-Area Connectivity Problems},
author = {Moises Goldszmidt and Emre M Kiciman and David A Maltz and John C Platt},
year = {2007},
date = {2007-00-01},
note = {US Patent App. 11/680,483},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Emre Mehmet Kiciman, Benjamin Livshits, Jiahe Helen Wang Remote monitoring of local behavior of network applications Miscellaneous 2007, (US Patent App. 11/862,074). @misc{kiciman2007remote,
title = {Remote monitoring of local behavior of network applications},
author = {Emre Mehmet Kiciman and Benjamin Livshits and Jiahe Helen Wang},
year = {2007},
date = {2007-00-01},
note = {US Patent App. 11/862,074},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
2006
|
Chad Verbowski, Emre Kıcıman, Arunvijay Kumar, Brad Daniels, Shan Lu, Juhan Lee, Yi-Min Wang, Roussi Roussev Flight data recorder: monitoring persistent-state interactions to improve systems management Proceedings Article In: Proceedings of the 7th Symp. on Operating Systems Design and Implementation (OSDI), pp. 117–130, USENIX 2006. @inproceedings{verbowski2006flight,
title = {Flight data recorder: monitoring persistent-state interactions to improve systems management},
author = {Chad Verbowski and Emre Kıcıman and Arunvijay Kumar and Brad Daniels and Shan Lu and Juhan Lee and Yi-Min Wang and Roussi Roussev},
url = {https://kiciman.org/wp-content/uploads/2017/08/FDR-OSDI-CR-FINAL.pdf},
year = {2006},
date = {2006-11-01},
booktitle = {Proceedings of the 7th Symp. on Operating Systems Design and Implementation (OSDI)},
pages = {117--130},
organization = {USENIX},
abstract = {Mismanagement of the persistent state of a system—all the executable files, configuration settings and other data that govern how a system functions—causes reliability problems, security vulnerabilities, and drives up operation costs. Recent research traces persistent state interactions—how state is read, modified, etc.—to help troubleshooting, change management and malware mitigation, but has been limited by the difficulty of collecting, storing, and analyzing the 10s to 100s of millions of daily events that occur on a single machine, much less the 1000s or more machines in many computing environments.
We present the Flight Data Recorder (FDR) that enables always-on tracing, storage and analysis of persistent state interactions. FDR uses a domain-specific log format, tailored to observed file system workloads and common systems management queries. Our lossless log format compresses logs to only 0.5-0.9 bytes per interaction. In this log format, 1000 machine-days of logs—over 25 billion events—can be analyzed in less than 30 minutes. We report on our deployment of FDR to 207 production machines at MSN, and show that a single centralized collection machine can potentially scale to collecting and analyzing the complete records of persistent state interactions from 4000+ machines. Furthermore, our tracing technology is shipping as part of the Windows Vista OS.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Mismanagement of the persistent state of a system—all the executable files, configuration settings and other data that govern how a system functions—causes reliability problems, security vulnerabilities, and drives up operation costs. Recent research traces persistent state interactions—how state is read, modified, etc.—to help troubleshooting, change management and malware mitigation, but has been limited by the difficulty of collecting, storing, and analyzing the 10s to 100s of millions of daily events that occur on a single machine, much less the 1000s or more machines in many computing environments.
We present the Flight Data Recorder (FDR) that enables always-on tracing, storage and analysis of persistent state interactions. FDR uses a domain-specific log format, tailored to observed file system workloads and common systems management queries. Our lossless log format compresses logs to only 0.5-0.9 bytes per interaction. In this log format, 1000 machine-days of logs—over 25 billion events—can be analyzed in less than 30 minutes. We report on our deployment of FDR to 207 production machines at MSN, and show that a single centralized collection machine can potentially scale to collecting and analyzing the complete records of persistent state interactions from 4000+ machines. Furthermore, our tracing technology is shipping as part of the Windows Vista OS. |
Emre Kıcıman, David A Maltz, Moises Goldszmidt, John C Platt Mining web logs to debug distant connectivity problems Proceedings Article In: Proceedings of the 2006 SIGCOMM Workshop on Mining Network Data (MineNet), pp. 287–292, ACM 2006. @inproceedings{kiciman2006mining,
title = {Mining web logs to debug distant connectivity problems},
author = {Emre Kıcıman and David A Maltz and Moises Goldszmidt and John C Platt},
url = {https://kiciman.org/wp-content/uploads/2017/08/MineNet-MiningWebLogs.pdf},
year = {2006},
date = {2006-09-15},
booktitle = {Proceedings of the 2006 SIGCOMM Workshop on Mining Network Data (MineNet)},
pages = {287--292},
organization = {ACM},
abstract = {Content providers base their business on their ability to receive and answer requests from clients distributed across the Internet. Since disruptions in the flow of these requests directly translate into lost revenue, there is tremendous incentive to diagnose why some requests fail and prod the responsible parties into corrective action. However, a content provider has only limited visibility into the state of the Internet outside its domain. Instead, it must mine failure diagnoses from available information sources to infer what is going wrong and who is responsible. Our ultimate goal is to help Internet content providers resolve reliability problems in the wide-area network that are affecting enduser perceived reliability. We describe two algorithms that represent our first steps towards enabling content providers to extract actionable debugging information from content provider logs, and we present the results of applying the algorithms to a week’s worth of logs from a large content provider, during which time it handled over 1 billion requests originating from over 10 thousand ASes.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Content providers base their business on their ability to receive and answer requests from clients distributed across the Internet. Since disruptions in the flow of these requests directly translate into lost revenue, there is tremendous incentive to diagnose why some requests fail and prod the responsible parties into corrective action. However, a content provider has only limited visibility into the state of the Internet outside its domain. Instead, it must mine failure diagnoses from available information sources to infer what is going wrong and who is responsible. Our ultimate goal is to help Internet content providers resolve reliability problems in the wide-area network that are affecting enduser perceived reliability. We describe two algorithms that represent our first steps towards enabling content providers to extract actionable debugging information from content provider logs, and we present the results of applying the algorithms to a week’s worth of logs from a large content provider, during which time it handled over 1 billion requests originating from over 10 thousand ASes. |
Richard Mortier, Emre Kıcıman Autonomic network management: some pragmatic considerations Proceedings Article In: Proceedings of the 2006 SIGCOMM Workshop on Internet Network Management, pp. 89–93, ACM 2006. @inproceedings{mortier2006autonomic,
title = {Autonomic network management: some pragmatic considerations},
author = {Richard Mortier and Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/inm06_camera_final.pdf},
year = {2006},
date = {2006-09-11},
booktitle = {Proceedings of the 2006 SIGCOMM Workshop on Internet Network Management},
pages = {89--93},
organization = {ACM},
abstract = {Autonomic Network Management (ANM) has the goal of increasing reliability and performance while reducing management cost using various automated techniques. These range from agent-based approaches relying on explicit models and ontologies to emergent techniques relying on gossip protocols, swarming algorithms or other biologically inspired work. In this paper, we review the failures, growing pains and successes of earlier techniques for automated and adaptive network control and management, from the simple control loops in TCP and OSPF to the more complicated emergent behaviors of BGP dynamics and overlay routing. From these examples we extract several lessons relevant to ongoing research in autonomic network management.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Autonomic Network Management (ANM) has the goal of increasing reliability and performance while reducing management cost using various automated techniques. These range from agent-based approaches relying on explicit models and ontologies to emergent techniques relying on gossip protocols, swarming algorithms or other biologically inspired work. In this paper, we review the failures, growing pains and successes of earlier techniques for automated and adaptive network control and management, from the simple control loops in TCP and OSPF to the more complicated emergent behaviors of BGP dynamics and overlay routing. From these examples we extract several lessons relevant to ongoing research in autonomic network management. |
Chad Verbowski, Emre Kıcıman, Brad Daniels, Yi-Min Wang, Roussi Roussev, Shan Lu, Juhan Lee Analyzing persistent state interactions to improve state management Proceedings Article In: ACM SIGMETRICS Performance Evaluation Review, pp. 363–364, ACM 2006. @inproceedings{verbowski2006analyzing,
title = {Analyzing persistent state interactions to improve state management},
author = {Chad Verbowski and Emre Kıcıman and Brad Daniels and Yi-Min Wang and Roussi Roussev and Shan Lu and Juhan Lee},
url = {https://kiciman.org/wp-content/uploads/2017/08/fdr-sigmetrics-short.pdf},
year = {2006},
date = {2006-06-01},
booktitle = {ACM SIGMETRICS Performance Evaluation Review},
volume = {34},
number = {1},
pages = {363--364},
organization = {ACM},
abstract = {A primary challenge to building reliable and secure computer systems is managing the persistent state (PS) of the system: all the executable files, configuration settings and other data that govern how a system functions. The difficulty comes from the sheer volume of this persistent state, the frequency of changes to it, and the variety of workloads and requirements that require customization of persistent state. The cost of not managing a are the leading cause of downtime at Internet services, troubleshooting configuration problems is a leading component of total cost of ownership in corporate environments, and malware effectively, unwanted persistent state is a serious privacy and security concern on personal computers.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
A primary challenge to building reliable and secure computer systems is managing the persistent state (PS) of the system: all the executable files, configuration settings and other data that govern how a system functions. The difficulty comes from the sheer volume of this persistent state, the frequency of changes to it, and the variety of workloads and requirements that require customization of persistent state. The cost of not managing a are the leading cause of downtime at Internet services, troubleshooting configuration problems is a leading component of total cost of ownership in corporate environments, and malware effectively, unwanted persistent state is a serious privacy and security concern on personal computers. |
George Candea, Emre Kıcıman, Shinichi Kawamoto, Armando Fox Autonomous recovery in componentized internet applications Journal Article In: Cluster Computing, vol. 9, no. 2, pp. 175–190, 2006, ISSN: 1573-7543. @article{candea2006autonomous,
title = {Autonomous recovery in componentized internet applications},
author = {George Candea and Emre Kıcıman and Shinichi Kawamoto and Armando Fox},
url = {https://link.springer.com/article/10.1007%2Fs10586-006-7562-4?LI=true},
doi = {10.1007/s10586-006-7562-4},
issn = {1573-7543},
year = {2006},
date = {2006-04-26},
journal = {Cluster Computing},
volume = {9},
number = {2},
pages = {175--190},
publisher = {Springer Netherlands},
abstract = {In this paper we show how to reduce downtime of J2EE applications by rapidly and automatically recovering from transient and intermittent software failures, without requiring application modifications. Our prototype combines three application-agnostic techniques: macroanalysis for fault detection and localization, microrebooting for rapid recovery, and external management of recovery actions. The individual techniques are autonomous and work across a wide range of componentized Internet applications, making them well-suited to the rapidly changing software of Internet services. The proposed framework has been integrated with JBoss, an open-source J2EE application server. Our prototype provides an execution platform that can automatically recover J2EE applications within seconds of the manifestation of a fault. Our system can provide a subset of a system's active end users with the illusion of continuous uptime, in spite of failures occurring behind the scenes, even when there is no functional redundancy in the system.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
In this paper we show how to reduce downtime of J2EE applications by rapidly and automatically recovering from transient and intermittent software failures, without requiring application modifications. Our prototype combines three application-agnostic techniques: macroanalysis for fault detection and localization, microrebooting for rapid recovery, and external management of recovery actions. The individual techniques are autonomous and work across a wide range of componentized Internet applications, making them well-suited to the rapidly changing software of Internet services. The proposed framework has been integrated with JBoss, an open-source J2EE application server. Our prototype provides an execution platform that can automatically recover J2EE applications within seconds of the manifestation of a fault. Our system can provide a subset of a system's active end users with the illusion of continuous uptime, in spite of failures occurring behind the scenes, even when there is no functional redundancy in the system. |
2005
|
Emre Kıcıman Using statistical monitoring to detect failures in internet services PhD Thesis Stanford University, 2005. @phdthesis{kiciman2005using,
title = {Using statistical monitoring to detect failures in internet services},
author = {Emre Kıcıman},
url = {https://kiciman.org/wp-content/uploads/2017/08/kiciman-thesis.pdf},
year = {2005},
date = {2005-09-01},
school = {Stanford University},
abstract = {Since the Internet’s popular emergence in the mid-1990’s, Internet services such as e-mail and messaging systems, search engines, e-commerce, news and financial sites, have become an important and often mission-critical part of our society. Unfortunately, managing these systems and keeping them running is a signi cant challenge. Their rapid rate of change as well as their size and complexity mean that the developers and operators of these services usually have only an incomplete idea of how the system works and even what it is supposed to do. This results in poor fault management, as operators have a hard time diagnosing faults and an even harder time detecting them. This dissertation argues that statistical monitoring|the use of statistical analysis and machine learning techniques to analyze live observations of a system’s behavior| can be an important tool in improving the manageability of Internet services. Statistical monitoring has several important features that are well suited to managing Internet services. First, the dynamic analysis of a system’s behavior in statistical monitoring means that there is no dependency on speci cations or descriptions that might be stale or incorrect. Second, monitoring a live, deployed system gives insight into system behavior that cannot be achieved in QA or testing environments. Third, automatic analysis through statistical monitoring can better cope with larger and more complex systems, aiding human operators as well as automating parts of the system management process.
The first half of this thesis focuses on a methodology to detect failures in Internet services, including high-level application failures, by monitoring structural behaviors that reflect the high-level functionality of the service. We implemented prototype fault monitors for a testbed Internet service and a clustered hashtable system. We also present encouraging early results from applying these techniques to two real, large Internet services.
In the second half of this thesis, we apply statistical monitoring techniques to two other problems related to fault detection: automatically inferring undocumented system structure and invariants and localizing the potential cause of a failure given its symptoms. We apply the former to theWindows Registry, a large, poorly documented and error-prone con guration database used by the Windows operating system and Windows-based applications. We describe and evaluate the latter in the context of our testbed Internet service.
Our experiences provide strong support for statistical monitoring, and suggest that it may prove to be an important tool in improving the manageability and reliability of Internet services.},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Since the Internet’s popular emergence in the mid-1990’s, Internet services such as e-mail and messaging systems, search engines, e-commerce, news and financial sites, have become an important and often mission-critical part of our society. Unfortunately, managing these systems and keeping them running is a signi cant challenge. Their rapid rate of change as well as their size and complexity mean that the developers and operators of these services usually have only an incomplete idea of how the system works and even what it is supposed to do. This results in poor fault management, as operators have a hard time diagnosing faults and an even harder time detecting them. This dissertation argues that statistical monitoring|the use of statistical analysis and machine learning techniques to analyze live observations of a system’s behavior| can be an important tool in improving the manageability of Internet services. Statistical monitoring has several important features that are well suited to managing Internet services. First, the dynamic analysis of a system’s behavior in statistical monitoring means that there is no dependency on speci cations or descriptions that might be stale or incorrect. Second, monitoring a live, deployed system gives insight into system behavior that cannot be achieved in QA or testing environments. Third, automatic analysis through statistical monitoring can better cope with larger and more complex systems, aiding human operators as well as automating parts of the system management process.
The first half of this thesis focuses on a methodology to detect failures in Internet services, including high-level application failures, by monitoring structural behaviors that reflect the high-level functionality of the service. We implemented prototype fault monitors for a testbed Internet service and a clustered hashtable system. We also present encouraging early results from applying these techniques to two real, large Internet services.
In the second half of this thesis, we apply statistical monitoring techniques to two other problems related to fault detection: automatically inferring undocumented system structure and invariants and localizing the potential cause of a failure given its symptoms. We apply the former to theWindows Registry, a large, poorly documented and error-prone con guration database used by the Windows operating system and Windows-based applications. We describe and evaluate the latter in the context of our testbed Internet service.
Our experiences provide strong support for statistical monitoring, and suggest that it may prove to be an important tool in improving the manageability and reliability of Internet services. |
Emre Kıcıman, Armando Fox Detecting application-level failures in component-based internet services Journal Article In: IEEE Transactions on Neural Networks, vol. 16, no. 5, pp. 1027–1041, 2005. @article{kiciman2005detecting,
title = {Detecting application-level failures in component-based internet services},
author = {Emre Kıcıman and Armando Fox},
url = {https://kiciman.org/wp-content/uploads/2017/08/anomaly-4.pdf},
year = {2005},
date = {2005-09-01},
journal = {IEEE Transactions on Neural Networks},
volume = {16},
number = {5},
pages = {1027--1041},
publisher = {IEEE},
abstract = {Most Internet services (e-commerce, search engines, etc.) suffer faults. Quickly detecting these faults can be the largest bottleneck in improving availability of the system. We present Pinpoint, a methodology for automatic fault detection in Internet services by (1) observing low-level, internal structural behaviors of the service; (2) modeling the majority behavior of the system as correct; and (3) detecting anomalies in these behaviors as possible symptoms of failures. Without requiring any a priori application-specific information, Pinpoint correctly detected 89-96% of major failures in our experiments, as compared to 20-70% detected by current application-generic techniques.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Most Internet services (e-commerce, search engines, etc.) suffer faults. Quickly detecting these faults can be the largest bottleneck in improving availability of the system. We present Pinpoint, a methodology for automatic fault detection in Internet services by (1) observing low-level, internal structural behaviors of the service; (2) modeling the majority behavior of the system as correct; and (3) detecting anomalies in these behaviors as possible symptoms of failures. Without requiring any a priori application-specific information, Pinpoint correctly detected 89-96% of major failures in our experiments, as compared to 20-70% detected by current application-generic techniques. |
Emre Kıcıman, Lakshminarayanan Subramanian Root cause localization in large scale systems Proceedings Article In: In Proceedings of the First Workshop on Hot Topics in System Dependability (HotDep), 2005. @inproceedings{kiciman2005root,
title = {Root cause localization in large scale systems},
author = {Emre Kıcıman and Lakshminarayanan Subramanian},
url = {https://kiciman.org/wp-content/uploads/2017/08/rootcause-ws.pdf},
year = {2005},
date = {2005-06-01},
booktitle = {In Proceedings of the First Workshop on Hot Topics in System Dependability (HotDep)},
journal = {In Proceedings of USENIX Hot Topics On Dependability (Hot-Dep)},
abstract = {Root cause localization, the process of identifying the source
of problems in a system using purely external observations, is
a significant challenge in many large-scale systems. In this paper,
we propose an abstract model that captures the common
issues underlying root cause localization and hence provides
the ability to leverage solutions across different systems.
},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Root cause localization, the process of identifying the source
of problems in a system using purely external observations, is
a significant challenge in many large-scale systems. In this paper,
we propose an abstract model that captures the common
issues underlying root cause localization and hence provides
the ability to leverage solutions across different systems.
|
2004
|
Benjamin C Ling, Emre Kıcıman, Armando Fox Session State: Beyond Soft State. Proceedings Article In: NSDI, pp. 22–22, USENIX 2004. @inproceedings{ling2004session,
title = {Session State: Beyond Soft State.},
author = {Benjamin C Ling and Emre Kıcıman and Armando Fox},
url = {https://kiciman.org/wp-content/uploads/2017/08/ling.pdf},
year = {2004},
date = {2004-03-29},
booktitle = {NSDI},
volume = {4},
pages = {22--22},
organization = {USENIX},
abstract = {We present a new approach to managing failures and evolution in large, complex distributed systems using runtime paths. We use the paths that requests follow as they move through the system as our core abstraction, and our “macro” approach focuses on component interactions rather than the details of the components themselves. Paths record component performance and interactions, are user- and request-centric, and occur in sufficient volume to enable statistical analysis, all in a way that is easy reusable across applications. Automated statistical analysis of multiple paths allows for the detection and diagnosis of complex failures and the assessment of evolution issues. In particular, our approach enables significantly stronger capabilities in failure detection, failure diagnosis, impact analysis, and understanding system evolution. We explore these capabilities with three real implementations, two of which service millions of requests per day. Our contributions include the approach; the maintainable, extensible, and reusable architecture; the various statistical analysis engines; and the discussion of our experience with a high-volume production service over several years.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We present a new approach to managing failures and evolution in large, complex distributed systems using runtime paths. We use the paths that requests follow as they move through the system as our core abstraction, and our “macro” approach focuses on component interactions rather than the details of the components themselves. Paths record component performance and interactions, are user- and request-centric, and occur in sufficient volume to enable statistical analysis, all in a way that is easy reusable across applications. Automated statistical analysis of multiple paths allows for the detection and diagnosis of complex failures and the assessment of evolution issues. In particular, our approach enables significantly stronger capabilities in failure detection, failure diagnosis, impact analysis, and understanding system evolution. We explore these capabilities with three real implementations, two of which service millions of requests per day. Our contributions include the approach; the maintainable, extensible, and reusable architecture; the various statistical analysis engines; and the discussion of our experience with a high-volume production service over several years. |
Mike Y. Chen, Anthony J Accardi, Emre Kıcıman, Jim Lloyd, David A Patterson, Armando Fox, Eric A Brewer Path-based failure and evolution management Proceedings Article In: In Proceedings of the 1st USENIX/ACM Symposium on Networked Systems Design and Implementation (NSDI '04), ACM, 2004. @inproceedings{chen2004path,
title = {Path-based failure and evolution management},
author = {Mike Y. Chen and Anthony J Accardi and Emre Kıcıman and Jim Lloyd and David A Patterson and Armando Fox and Eric A Brewer},
url = {https://kiciman.org/wp-content/uploads/2017/08/paths-nsdi.pdf},
year = {2004},
date = {2004-03-01},
booktitle = {In Proceedings of the 1st USENIX/ACM Symposium on Networked Systems Design and Implementation (NSDI '04)},
publisher = {ACM},
abstract = {We present a new approach to managing failures and evolution in large, complex distributed systems using runtime paths. We use the paths that requests follow as they move through the system as our core abstraction, and our “macro” approach focuses on component interactions rather than the details of the components themselves. Paths record component performance and interactions, are user- and request-centric, and occur in sufficient volume to enable statistical analysis, all in a way that is easy reusable across applications. Automated statistical analysis of multiple paths allows for the detection and diagnosis of complex failures and the assessment of evolution issues. In particular, our approach enables significantly stronger capabilities in failure detection, failure diagnosis, impact analysis, and understanding system evolution. We explore these capabilities with three real implementations, two of which service millions of requests per day. Our contributions include the approach; the maintainable, extensible, and reusable architecture; the various statistical analysis engines; and the discussion of our experience with a high-volume production service over several years.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
We present a new approach to managing failures and evolution in large, complex distributed systems using runtime paths. We use the paths that requests follow as they move through the system as our core abstraction, and our “macro” approach focuses on component interactions rather than the details of the components themselves. Paths record component performance and interactions, are user- and request-centric, and occur in sufficient volume to enable statistical analysis, all in a way that is easy reusable across applications. Automated statistical analysis of multiple paths allows for the detection and diagnosis of complex failures and the assessment of evolution issues. In particular, our approach enables significantly stronger capabilities in failure detection, failure diagnosis, impact analysis, and understanding system evolution. We explore these capabilities with three real implementations, two of which service millions of requests per day. Our contributions include the approach; the maintainable, extensible, and reusable architecture; the various statistical analysis engines; and the discussion of our experience with a high-volume production service over several years. |
Emre Kıcıman, Yi-Min Wang Discovering correctness constraints for self-management of system configuration Proceedings Article In: In Proceedings of the Intl Conference on Autonomic Computing (ICAC) , pp. 28–35, IEEE 2004. @inproceedings{kiciman2004discovering,
title = {Discovering correctness constraints for self-management of system configuration},
author = {Emre Kıcıman and Yi-Min Wang},
url = {https://kiciman.org/wp-content/uploads/2017/08/tr-2004-22.pdf},
year = {2004},
date = {2004-03-01},
booktitle = {In Proceedings of the Intl Conference on Autonomic Computing (ICAC) },
pages = {28--35},
organization = {IEEE},
abstract = {Managing the configuration of computer systems today is a difficult task. Too easily, a computer user or administrator can make a simple mistake or lapse and misconfigure a system, causing instabilities, unexpected behavior, and general unreliability. Bugs in software that changes these configurations, such as installers, only worsen the situation. A self-managing configuration system should be continuously monitoring itself for invalid settings, preventing the bugs from harming the system. Unfortunately, while there are many constraints which can differentiate between valid and invalid settings, few of these constraints are explicitly written down, much less written down in a form usable by an automatic monitor. We propose an approach to automatically infer these correctness constraints based on samples of known good configurations. In this paper we present Glean, a system for analyzing the structure of configurations and automatically inferring four types of correctness constraints on that structure.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Managing the configuration of computer systems today is a difficult task. Too easily, a computer user or administrator can make a simple mistake or lapse and misconfigure a system, causing instabilities, unexpected behavior, and general unreliability. Bugs in software that changes these configurations, such as installers, only worsen the situation. A self-managing configuration system should be continuously monitoring itself for invalid settings, preventing the bugs from harming the system. Unfortunately, while there are many constraints which can differentiate between valid and invalid settings, few of these constraints are explicitly written down, much less written down in a form usable by an automatic monitor. We propose an approach to automatically infer these correctness constraints based on samples of known good configurations. In this paper we present Glean, a system for analyzing the structure of configurations and automatically inferring four types of correctness constraints on that structure. |
Armando Fox, E Kiciman, David Patterson, Randy Katz, Michael Jordan, Ion Stoica Statistical Monitoring+ Predictable Recovery= Self-* Proceedings Article In: Proceedings of the Internation Workshop on Self-* Properties in Complex Information Systems (SELF-STAR), 2004. @inproceedings{fox2004statistical,
title = {Statistical Monitoring+ Predictable Recovery= Self-*},
author = {Armando Fox and E Kiciman and David Patterson and Randy Katz and Michael Jordan and Ion Stoica},
year = {2004},
date = {2004-01-01},
booktitle = {Proceedings of the Internation Workshop on Self-* Properties in Complex Information Systems (SELF-STAR)},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Armando Fox, Emre Kıcıman, David Patterson Combining statistical monitoring and predictable recovery for self-management Proceedings Article In: Proceedings of the 1st ACM SIGSOFT Workshop on Self-Managed Systems, pp. 49–53, ACM 2004. @inproceedings{fox2004combining,
title = {Combining statistical monitoring and predictable recovery for self-management},
author = {Armando Fox and Emre Kıcıman and David Patterson},
year = {2004},
date = {2004-01-01},
booktitle = {Proceedings of the 1st ACM SIGSOFT Workshop on Self-Managed Systems},
pages = {49--53},
organization = {ACM},
abstract = {Complex distributed Internet services form the basis not only of e-commerce but increasingly of mission-critical networkbased applications. What is new is that the workload and internal architecture of three-tier enterprise applications presents the opportunity for a new approach to keeping them running in the face of many common recoverable failures. The core of the approach is anomaly detection and localization based on statistical machine learning techniques. Unlike previous approaches, we propose anomaly detection and pattern mining not only for operational statistics such as mean response time, but also for structural behaviors of the system—what parts of the system, in what combinations, are being exercised in response to different kinds of external stimuli. In addition, rather than building baseline models a priori, we extract them by observing the behavior of the system over a short period of time during normal operation. We explain the necessary underlying assumptions and why they can be realized by systems research, report on some early successes using the approach, describe benefits of the approach that make it competitive as a path toward self-managing systems, and outline some research challenges. Our hope is that this approach will enable “new science” in the design of self-managing systems by allowing the rapid and widespread application of statistical learning theory techniques (SLT) to problems of system dependability.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Complex distributed Internet services form the basis not only of e-commerce but increasingly of mission-critical networkbased applications. What is new is that the workload and internal architecture of three-tier enterprise applications presents the opportunity for a new approach to keeping them running in the face of many common recoverable failures. The core of the approach is anomaly detection and localization based on statistical machine learning techniques. Unlike previous approaches, we propose anomaly detection and pattern mining not only for operational statistics such as mean response time, but also for structural behaviors of the system—what parts of the system, in what combinations, are being exercised in response to different kinds of external stimuli. In addition, rather than building baseline models a priori, we extract them by observing the behavior of the system over a short period of time during normal operation. We explain the necessary underlying assumptions and why they can be realized by systems research, report on some early successes using the approach, describe benefits of the approach that make it competitive as a path toward self-managing systems, and outline some research challenges. Our hope is that this approach will enable “new science” in the design of self-managing systems by allowing the rapid and widespread application of statistical learning theory techniques (SLT) to problems of system dependability. |
2003
|
Shankar R Ponnekanti, Brad Johanson, Emre Kiciman, Armando Fox Portability, extensibility and robustness in iROS Proceedings Article In: Pervasive Computing and Communications, 2003.(PerCom 2003). Proceedings of the First IEEE International Conference on, pp. 11–19, IEEE 2003. @inproceedings{ponnekanti2003portability,
title = {Portability, extensibility and robustness in iROS},
author = {Shankar R Ponnekanti and Brad Johanson and Emre Kiciman and Armando Fox},
year = {2003},
date = {2003-01-01},
booktitle = {Pervasive Computing and Communications, 2003.(PerCom 2003). Proceedings of the First IEEE International Conference on},
pages = {11--19},
organization = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
George Candea, Emre Kiciman, Steve Zhang, Pedram Keyani, Armando Fox JAGR: An autonomous self-recovering application server Proceedings Article In: Autonomic Computing Workshop. 2003. Proceedings of the, pp. 168–177, IEEE 2003. @inproceedings{candea2003jagr,
title = {JAGR: An autonomous self-recovering application server},
author = {George Candea and Emre Kiciman and Steve Zhang and Pedram Keyani and Armando Fox},
year = {2003},
date = {2003-01-01},
booktitle = {Autonomic Computing Workshop. 2003. Proceedings of the},
pages = {168--177},
organization = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Mike Chen, Emre Kcman, Anthony Accardi, O Fox, Eric Brewer Using runtime paths for macro analysis Proceedings Article In: In 9th Workshop on Hot Topics in Operating Systems, Kauai, HI, 2003. @inproceedings{chen2003using,
title = {Using runtime paths for macro analysis},
author = {Mike Chen and Emre Kcman and Anthony Accardi and O Fox and Eric Brewer},
year = {2003},
date = {2003-01-01},
booktitle = {In 9th Workshop on Hot Topics in Operating Systems, Kauai, HI},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Emre Kiciman, Armando Fox Detecting and localizing anomalous behavior to discover failures in component-based internet services Journal Article In: 2003. @article{kiciman2003detecting,
title = {Detecting and localizing anomalous behavior to discover failures in component-based internet services},
author = {Emre K{i}c{i}man and Armando Fox},
year = {2003},
date = {2003-01-01},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Aaron Brown, Pete Broadwell, George Candea, Mike Chen, James Cutler, Armando Fox, Emre Kiciman, David Oppenheimer, Jonathan Traupman Recovery Oriented Computing (ROC) Journal Article In: 2003. @article{brown2003recovery,
title = {Recovery Oriented Computing (ROC)},
author = {Aaron Brown and Pete Broadwell and George Candea and Mike Chen and James Cutler and Armando Fox and Emre K{i}c{i}man and David Oppenheimer and Jonathan Traupman},
year = {2003},
date = {2003-01-01},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2002
|
Edward Swierk, Emre Kiciman, Nathan C Williams, Takashi Fukushima, Hideki Yoshida, Vince Laviano, Mary Baker The Roma personal metadata service Journal Article In: Mobile Networks and Applications, vol. 7, no. 5, pp. 407–418, 2002. @article{swierk2002roma,
title = {The Roma personal metadata service},
author = {Edward Swierk and Emre Kiciman and Nathan C Williams and Takashi Fukushima and Hideki Yoshida and Vince Laviano and Mary Baker},
year = {2002},
date = {2002-01-01},
journal = {Mobile Networks and Applications},
volume = {7},
number = {5},
pages = {407--418},
publisher = {Springer-Verlag New York, Inc.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Shankar Ponnekanti, Brad Johanson, Emre Kiciman, Armando Fox Designing for Maintainability, Failure Resilience, and Evolvability in Ubiquitous Computing Software Journal Article In: Submission to Operating Systems Design and Implementation, 2002. @article{ponnekanti2002designing,
title = {Designing for Maintainability, Failure Resilience, and Evolvability in Ubiquitous Computing Software},
author = {Shankar Ponnekanti and Brad Johanson and Emre K{i}c{i}man and Armando Fox},
year = {2002},
date = {2002-01-01},
journal = {Submission to Operating Systems Design and Implementation},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Emre Kiciman Using Internet Services to Manage Massive Evolving Information for Ubiquitous Computing Systems Proceedings Article In: ADJUNCT PROCEEDINGS, pp. 71, 2002. @inproceedings{kiciman2002using,
title = {Using Internet Services to Manage Massive Evolving Information for Ubiquitous Computing Systems},
author = {Emre K{i}c{i}man},
year = {2002},
date = {2002-01-01},
booktitle = {ADJUNCT PROCEEDINGS},
pages = {71},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Mike Y Chen, Emre Kiciman, Eugene Fratkin, Armando Fox, Eric Brewer Pinpoint: Problem determination in large, dynamic internet services Proceedings Article In: Dependable Systems and Networks, 2002. DSN 2002. Proceedings. International Conference on, pp. 595–604, IEEE 2002. @inproceedings{chen2002pinpoint,
title = {Pinpoint: Problem determination in large, dynamic internet services},
author = {Mike Y Chen and Emre Kiciman and Eugene Fratkin and Armando Fox and Eric Brewer},
year = {2002},
date = {2002-01-01},
booktitle = {Dependable Systems and Networks, 2002. DSN 2002. Proceedings. International Conference on},
pages = {595--604},
organization = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
David Patterson, Aaron Brown, Pete Broadwell, George Candea, Mike Chen, James Cutler, Patricia Enriquez, Armando Fox, Emre Kiciman, Matthew Merzbacher, others Recovery-oriented computing (ROC): Motivation, definition, techniques, and case studies Technical Report Technical Report UCB//CSD-02-1175, UC Berkeley Computer Science 2002. @techreport{patterson2002recovery,
title = {Recovery-oriented computing (ROC): Motivation, definition, techniques, and case studies},
author = {David Patterson and Aaron Brown and Pete Broadwell and George Candea and Mike Chen and James Cutler and Patricia Enriquez and Armando Fox and Emre Kiciman and Matthew Merzbacher and others},
year = {2002},
date = {2002-01-01},
institution = {Technical Report UCB//CSD-02-1175, UC Berkeley Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {techreport}
}
|
Mike Y Chen, Emre Kiciman, Eric Brewer An online evolutionary approach to developing Internet services Proceedings Article In: Proceedings of the 10th workshop on ACM SIGOPS European workshop, pp. 161–164, ACM 2002. @inproceedings{chen2002online,
title = {An online evolutionary approach to developing Internet services},
author = {Mike Y Chen and Emre Kiciman and Eric Brewer},
year = {2002},
date = {2002-01-01},
booktitle = {Proceedings of the 10th workshop on ACM SIGOPS European workshop},
pages = {161--164},
organization = {ACM},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2001
|
Emre Kiciman, Armando Fox, O Fox Separation of concerns in networked service composition Proceedings Article In: Position Paper Workshop on Advanced Separation of Concerns in Software Engineering at ICSE 2001, 2001. @inproceedings{kiciman2001separation,
title = {Separation of concerns in networked service composition},
author = {Emre Kiciman and Armando Fox and O Fox},
year = {2001},
date = {2001-01-01},
booktitle = {Position Paper Workshop on Advanced Separation of Concerns in Software Engineering at ICSE 2001},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Brad Johanson, Shankar Ponnekanti, Emre Kiciman, Caesar Sengupta, Armando Fox System support for interactive workspaces Journal Article In: 2001. @article{johanson2001system,
title = {System support for interactive workspaces},
author = {Brad Johanson and Shankar Ponnekanti and Emre Kiciman and Caesar Sengupta and Armando Fox},
year = {2001},
date = {2001-01-01},
publisher = {Citeseer},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
James Cutler, Charles Fraleigh, Devendra Jaisinghani, Dora Karali, Emre Kiciman LUDDIDE—Location and User Dependent Information Delivery Miscellaneous 2001. @misc{cutler2001luddide,
title = {LUDDIDE—Location and User Dependent Information Delivery},
author = {James Cutler and Charles Fraleigh and Devendra Jaisinghani and Dora Karali and Emre Kiciman},
year = {2001},
date = {2001-01-01},
publisher = {Stanford University},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
|
Emre Kiciman, Laurence Melloul, Armando Fox Position summary: towards zero-code service composition Proceedings Article In: Hot Topics in Operating Systems, 2001. Proceedings of the Eighth Workshop on, pp. 172, IEEE 2001. @inproceedings{kiciman2001position,
title = {Position summary: towards zero-code service composition},
author = {Emre Kiciman and Laurence Melloul and Armando Fox},
year = {2001},
date = {2001-01-01},
booktitle = {Hot Topics in Operating Systems, 2001. Proceedings of the Eighth Workshop on},
pages = {172},
organization = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2000
|
Helen J Wang, Bhaskaran Raman, Chen-nee Chuah, Rahul Biswas, Ramakrishna Gummadi, Barbara Hohlt, Xia Hong, Emre Kiciman, Zhuoqing Mao, Jimmy S Shih, others ICEBERG: An Internet core network architecture for integrated communications Journal Article In: IEEE Personal Communications, vol. 7, no. 4, pp. 10–19, 2000. @article{wang2000iceberg,
title = {ICEBERG: An Internet core network architecture for integrated communications},
author = {Helen J Wang and Bhaskaran Raman and Chen-nee Chuah and Rahul Biswas and Ramakrishna Gummadi and Barbara Hohlt and Xia Hong and Emre Kiciman and Zhuoqing Mao and Jimmy S Shih and others},
year = {2000},
date = {2000-01-01},
journal = {IEEE Personal Communications},
volume = {7},
number = {4},
pages = {10--19},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
Emre Kiciman, Armando Fox Using dynamic mediation to integrate COTS entities in a ubiquitous computing environment Proceedings Article In: International Symposium on Handheld and Ubiquitous Computing, pp. 211–226, Springer, Berlin, Heidelberg 2000. @inproceedings{kiciman2000using,
title = {Using dynamic mediation to integrate COTS entities in a ubiquitous computing environment},
author = {Emre Kiciman and Armando Fox},
year = {2000},
date = {2000-01-01},
booktitle = {International Symposium on Handheld and Ubiquitous Computing},
pages = {211--226},
organization = {Springer, Berlin, Heidelberg},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Brad Johanson, Emre Kiciman, O Fox Moving Data and Interfaces in an Interactive Workspace Journal Article In: 2000. @article{johanson2000moving,
title = {Moving Data and Interfaces in an Interactive Workspace},
author = {Brad Johanson and Emre Kiciman and O Fox},
year = {2000},
date = {2000-01-01},
publisher = {Citeseer},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
1999
|
Anthony D Joseph, Barbara Hohlt, Randy Katz, Emre Kiciman System Support for Multi-Modal Information Access and Device Control Journal Article In: interfaces, vol. 6, no. 8, pp. 11, 1999. @article{joseph1999system,
title = {System Support for Multi-Modal Information Access and Device Control},
author = {Anthony D Joseph and Barbara Hohlt and Randy Katz and Emre Kiciman},
year = {1999},
date = {1999-01-01},
journal = {interfaces},
volume = {6},
number = {8},
pages = {11},
publisher = {Citeseer},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
R Katz, A Joseph, S Czerwinski, T Hodes, B Hohlt, E Kiciman, R Ludwig, S Mukkamalla, K Oden, A Ordonez, others A scalable service architecture for computer-telephony integration Proceedings Article In: conference, 1999. @inproceedings{katz1999scalable,
title = {A scalable service architecture for computer-telephony integration},
author = {R Katz and A Joseph and S Czerwinski and T Hodes and B Hohlt and E Kiciman and R Ludwig and S Mukkamalla and K Oden and A Ordonez and others},
year = {1999},
date = {1999-01-01},
booktitle = {conference},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|