@software {637981,
title = {Compactness: An R Package for Measuring Legislative District Compactness If You Only Know it When You See It},
year = {2018},
abstract = {\ This software implements the method described in Aaron Kaufman, Gary King, and Mayya Komisarchik. Forthcoming. {\textquotedblleft}How to Measure Legislative District Compactness If You Only Know it When You See It.{\textquotedblright} American Journal of Political Science. Copy at\ http://j.mp/2u9OWrG\ Our paper abstract:\ \ To deter gerrymandering, many state constitutions require legislative districts to be "compact."\ Yet, the law offers few precise definitions other than "you know it when you see it,"\ which effectively implies a common understanding of the concept. In contrast, academics have shown that compactness has multiple dimensions and have generated many conflicting measures. We hypothesize that both are correct -- that compactness is complex and multidimensional, but a common understanding exists across people. We develop a survey to elicit this understanding, with high reliability (in data where the standard paired comparisons approach fails). We create a statistical model that predicts, with high accuracy, solely from the geometric features of the district, compactness evaluations by judges and public officials responsible for redistricting, among others. We also offer compactness data from our validated measure for 20,160 state legislative and congressional districts, as well as software to compute this measure from any district.\ \ },
url = {https://github.com/aaronrkaufman/compactness},
author = {Aaron Kaufman and Gary King and Mayya Komisarchik}
}
@software {637980,
title = {Readme2: An R Package for Improved Automated Nonparametric Content Analysis for Social Science},
year = {2018},
abstract = {An R package for estimating category proportions in an unlabeled set of documents given a labeled set, by implementing the method described in\ Jerzak, King, and Strezhnev (2019). This method is meant to improve on the ideas in Hopkins and King (2010), which introduced a quantification algorithm to estimate category proportions without directly classifying individual observations. This version of the software refines the original method by implementing a technique for selecitng optimal textual features in order to minimize the error of the estimated category proportions. Automatic differentiation, stochastic gradient descent, and batch re-normalization are used to carry out the optimization. Other pre-processing functions are available, as well as an interface to the earlier version of the algorithm for comparison. The package also provides users with the ability to extract the generated features for use in other tasks.(Here{\textquoteright}s the abstract from our paper:\ Computer scientists and statisticians are often interested in classifying textual documents into chosen categories. Social scientists and others are often less interested in any one document and instead try to estimate the proportion falling in each category. The two existing types of techniques for estimating these category proportions are parametric "classify and count" methods and "direct" nonparametric estimation of category proportions without an individual classification step. Unfortunately, classify and count methods can sometimes be highly model dependent or generate more bias in the proportions even as the percent correctly classified increases. Direct estimation avoids these problems, but can suffer when the meaning and usage of language is too similar across categories or too different between training and test sets. We develop an improved direct estimation approach without these problems by introducing continuously valued text features optimized for this problem, along with a form of matching adapted from the causal inference literature. We evaluate our approach in analyses of a diverse collection of 73 data sets, showing that it substantially improves performance compared to existing approaches. As a companion to this paper, we offer easy-to-use software that implements all ideas discussed herein.)},
url = {https://github.com/iqss-research/readme-software},
author = {Connor T. Jerzak and Gary King and Anton Strezhnev}
}
@software {201796,
title = {MatchingFrontier: R Package for Calculating the Balance-Sample Size Frontier},
year = {2014},
abstract = {MatchingFrontier is an easy-to-use R Package for making optimal causal inferences from observational data. \ Despite their popularity, existing matching approaches leave researchers with two fundamental tensions. First, they are designed to maximize one metric (such as propensity score or Mahalanobis distance) but are judged against another for which they were not designed (such as L1 or differences in means). Second, they lack a principled solution to revealing the implicit bias-variance trade off: matching methods need to optimize with respect to both imbalance (between the treated and control groups) and the number of observations pruned, but existing approaches optimize with respect to only one; users then either ignore the other, or tweak it, usually suboptimally, by hand.
MatchingFrontier resolves both tensions by consolidating previous techniques into a single, optimal, and flexible approach. It calculates the matching solution with maximum balance for each possible sample size (N, N-1, N-2,...). It thus directly calculates the entire balance-sample size frontier, from which the user can easily choose one, several, or all subsamples from which to conduct their final analysis, given their own choice of imbalance metric and quantity of interest. MatchingFrontier solves the joint optimization problem in one run, automatically, without manual tweaking, and without iteration. \ Although for each subset size\ k, there exist a huge (N\ choose\ k) number of unique subsets, MatchingFrontier includes specially designed fast algorithms that give the optimal answer, usually in a few minutes. \
MatchingFrontier\ implements the methods in this paper: \
King, Gary, Christopher Lucas, and Richard Nielsen. 2014. The Balance-Sample Size Frontier in Matching Methods for Causal Inference, copy at\ http://j.mp/1dRDMrE
See\ http://projects.iq.harvard.edu/frontier/},
author = {Gary King and Christopher Lucas and Richard Nielsen}
}
@software {18106,
title = {AutoCast: Automated Bayesian Forecasting with YourCast},
year = {2011},
url = {http://gking.harvard.edu/software/autocast-automated-bayesian-forecasting-yourcast},
author = {Jonathan Bischof and Gary King and Samir Soneji}
}
@software {GelKinTho94,
title = {JudgeIt II: A Program for Evaluating Electoral Systems and Redistricting Plans},
year = {2010},
abstract = {
A program for analyzing most any feature of district-level legislative elections data, including prediction, evaluating redistricting plans, estimating counterfactual hypotheses (such as what would happen if a term-limitation amendment were imposed). This implements statistical procedures described in a series of journal articles and has been used during redistricting in many states by judges, partisans, governments, private citizens, and many others. The earlier version was winner of the APSA Research Software Award.
Track JudgeIt Changes
},
url = {http://gking.harvard.edu/judgeit},
author = {Andrew Gelman and Gary King and Andrew Thomas}
}
@software {Kin,
title = {ReadMe: Software for Automated Content Analysis},
year = {2010},
abstract = {This program will read and analyze a large set of text documents and report on the proportion of documents in each of a set of given categories. },
url = {http://gking.harvard.edu/readme},
author = {Gary King and Matthew Knowles and Steven Melendez}
}
@software {HonKinBLa09,
title = {AMELIA II: A Program for Missing Data},
year = {2009},
abstract = {This program multiply imputes missing data in cross-sectional, time series, and time series cross-sectional data sets. It includes a Windows version (no knowledge of R required), and a version that works with R either from the command line or via a GUI.},
url = {http://gking.harvard.edu/amelia},
author = {James Honaker and Gary King and Matthew Blackwell}
}
@software {18110,
title = {CEM: Coarsened Exact Matching Software},
year = {2009},
url = {http://gking.harvard.edu/cem/},
author = {Stefano Iacus and Gary King and Giuseppe Porro}
}
@software {18116,
title = {VA: Verbal Autopsies},
year = {2008},
url = {http://gking.harvard.edu/va},
author = {Gary King and Ying Lu}
}
@software {18060,
title = {Anchors: Software for Anchoring Vignettes Data},
year = {2007},
url = {http://gking.harvard.edu/anchors},
author = {Johnathan Wand and Gary King and Olivia Lau}
}
@software {18058,
title = {MatchIt: Nonparametric Preprocessing for Parametric Causal Inference},
year = {2007},
url = {http://gking.harvard.edu/matchit},
author = {Gary King and Kosuke Imai and Gary King and Elizabeth A. Stuart}
}
@software {ImaKinLau06,
title = {Zelig: Everyone{\textquoteright}s Statistical Software},
year = {2006},
url = {http://gking.harvard.edu/zelig},
author = {Kosuke Imai and Gary King and Olivia Lau}
}
@software {18119,
title = {WhatIf: Software for Evaluating Counterfactuals},
year = {2005},
url = {http://gking.harvard.edu/whatif},
author = {Heather Stoll and Gary King and Langche Zeng}
}
@software {6877,
title = {YourCast},
year = {2004},
abstract = {YourCast is (open source and free) software that makes forecasts by running sets of linear regressions together in a variety of sophisticated ways. YourCast avoids the bias that results when stacking datasets from separate cross-sections and assuming constant parameters, and the inefficiency that results from running independent regressions in each cross-section.},
url = {http://gking.harvard.edu/yourcast},
author = {Frederico Girosi and Gary King}
}
@software {TomWitKin03,
title = {CLARIFY: Software for Interpreting and Presenting Statistical Results},
journal = {Journal of Statistical Software},
volume = {8},
year = {2003},
abstract = {This is a set of easy-to-use Stata macros that implement the techniques described in Gary King, Michael Tomz, and Jason Wittenberg{\textquoteright}s "Making the Most of Statistical Analyses: Improving Interpretation and Presentation". To install Clarify, type "net from\ https://gking.harvard.edu/clarify\ (https://gking.harvard.edu/clarify)" at the Stata command line.Winner of the Okidata Best Research Software Award. Also try -ssc install qsim- to install a wrapper, donated by Fred Wolfe, to automate Clarify{\textquoteright}s simulation of dummy variables.},
author = {Michael Tomz and Jason Wittenberg and Gary King}
}
@software {18121,
title = {EI: A Program for Ecological Inference},
year = {2003},
url = {http://gking.harvard.edu/ei},
author = {Gary King}
}
@software {King04,
title = {EzI: A(n Easy) Program for Ecological Inference},
year = {2003},
url = {http://gking.harvard.edu/EzI},
author = {Gary King and Kenneth Benoit}
}
@software {18113,
title = {ReLogit: Rare Events Logistic Regression},
year = {2003},
url = {http://gking.harvard.edu/relogit},
author = {Gary King and Michael Tomz and Langche Zeng}
}
@software {Kin02,
title = {COUNT: A Program for Estimating Event Count and Duration Regressions},
year = {2002},
note = {Versions 1988-2002, published as a stand-alone program and as part of the Gauss Package by Aptech Systems, Kent, Washington.},
abstract = {A stand-alone, easy-to-use program for running event count and duration regression models, developed by and/or discussed in a series of journal articles by me. (Event count models have a dependent variable measured as the number of times something happens, such as the number of uncontested seats per state or the number of wars per year. Duration models explain dependent variables measured as the time until some event, such as the number of months a parliamentary cabinet endures.) Winner of the APSA Research Software Award.},
author = {Gary King}
}
@software {HonJosKin98,
title = {AMELIA: A Program for Missing Data},
year = {1998},
url = {http://gking.harvard.edu/amelia},
author = {James Honaker and Anne Joseph and Gary King and Kenneth Scheve and Naunihal Singh.}
}
@software {King98,
title = {MAXLIK},
year = {1998},
abstract = {A set of Gauss programs and datasets (annotated for pedagogical purposes) to implement many of the maximum likelihood-based models I discuss in Unifying Political Methodology: The Likelihood Theory of Statistical Inference, Ann Arbor: University of Michigan Press, 1998, and use in my class. All datasets are real, not simulated.},
author = {Gary King}
}
@software {GelKin92,
title = {JudgeIt I: A Program for Evaluating Electoral Systems and Redistricting Plans},
year = {1992},
abstract = {A program for analyzing almost any feature of district-level legislative elections data, including prediction, evaluating redistricting plans, estimating counterfactual hypotheses (such as what would happen if a term-limitation amendment were imposed), and others. This implements statistical procedures described in a series of journal articles and has been used during redistricting in many states by judges, partisans, governments, private citizens, and many others. Winner of the APSA Research Software Award. },
author = {Andrew Gelman and Gary King}
}