"""Some of the code here is based on that produced by the Princeton Election Consortium, at election.princeton.edu/code""" import urllib.request, urllib.parse, urllib.error, time import scipy, scipy.special, pylab states = ['AL','AK','AZ','AR','CA','CO','CT','DC','DE','FL','GA','HI','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY'] EVs = [9,3,10,6,55,9,7,3,3,27,15,4,4,21,11,7,6,8,9,4,10,12,17,10,6,11,3,5,5,4,15,5,31,15,3,20,7,7,21,4,8,3,11,34,5,3,13,11,5,10,3] evotes = dict(list(zip(states, EVs))) def GetCurrentDate(): """Return the current date as an integer reflecting its position in the calendar year. Hint: use the time.localtime() function.""" return time.localtime()[7] def GetPrincetonPollingData(): """Return a dictionary (e.g., named polls) that summarizes polling data from http://election.princeton.edu/code/matlab/polls.median.txt using the urllib.urlretrieve() function. Parse the resulting file to produce a dictionary: at the highest level, polls is keyed by a date (an integer, e.g., that returned by GetCurrentDate()) that holds another dictionary; polls[date] is keyed by state names (abbreviations in the list of states) and holds a tuple for each state; the tuple contains the polling margin (Democrat-Republican) and the SEM (standard error of the mean) of those polling data""" url = 'http://election.princeton.edu/code/matlab/polls.median.txt' pollsfile, pollsmsg = urllib.request.urlretrieve(url) polls = {} count = -1 cur_date = GetCurrentDate() polls[cur_date] = {} for line in file(pollsfile): sline = line.split() num_polls = int(sline[0]) date_oldest = int(sline[1]) dr_margin = float(sline[2]) sem = max(float(sline[3]), 2.0) date = int(sline[4]) if date != cur_date: count = 0 polls[date] = {} cur_date = date else: count += 1 polls[date][states[count]] = (dr_margin, sem) return polls def GetDemWinProbabilitiesFromPolls(polls, date): """From the polling dictionary, for a specified date (integer), return a dictionary that maps state names (abbreviations) to the probability of a Democratic win in that state, assuming a normal probability distribution. Hint: the erf function is the integral of a gaussian, and the scipy.special module is a useful place to look for special functions.""" dem_win_probs = {} for state in polls[date]: margin, sem = polls[date][state] zscore = margin/sem prob_dem_win=(scipy.special.erf(zscore/scipy.sqrt(2.))+1.)/2. dem_win_probs[state] = prob_dem_win return dem_win_probs def SimulateElectionFromPolls(evotes, polls, date): """Simulate the electoral vote outcome of one random election, using the polling data from a specified date, and the dictionary of electoral vote counts (evotes). Return a tuple of (Democratic_wins, Republican_wins, totals) where wins are lists of states won by each candidate, and totals is a tuple of the total number of (Democratic, Republican) votes.""" dem_wins = [] rep_wins = [] dem_win_probs = GetDemWinProbabilitiesFromPolls(polls, date) for state, dem_win in list(polls[date].items()): sample = scipy.random.random() if sample < dem_win: dem_wins.append(state) else: rep_wins.append(state) totals = (sum([evotes[w] for w in dem_wins]), \ sum([evotes[w] for w in rep_wins])) return dem_wins, rep_wins, totals def ComputeExactEVDistribution(evotes, polls, date): """Compute the exact electoral vote probability distribution from polling data on a specified date, using the "meta-analysis" convolution method described at http://election.princeton.edu/faq/ and in http://election.princeton.edu/code/matlab/EV_median.m""" prob = GetDemWinProbabilitiesFromPolls(polls, date) states_evs = list(evotes.items()) state0, ev0 = states_evs[0] evdist = scipy.concatenate(([prob[state0]], scipy.zeros(ev0-1), \ [1.-prob[state0]])) for state, ev in states_evs[1:]: p = prob[state] nextev = scipy.concatenate(([prob[state]], scipy.zeros(ev-1), \ [1.-prob[state]])) evdist = scipy.convolve(evdist, nextev) evdist = evdist.tolist() evdist.reverse() return scipy.array(evdist) def ComputeAllExactEVDistributions(evotes, polls): allev = {} for date in polls: dist = ComputeExactEVDistribution(evotes, polls, date) allev[date] = dist return allev def PlotEVMovie(evotes, polls): allev = ComputeAllExactEVDistributions(evotes, polls) pylab.ion() for date in sorted(allev.keys()): pylab.plot(allev[date]) pylab.draw() def GetBaseStatesAndSwingStates(polls, date, threshold): """From the polling data on a specified date, and a threshold percentage, return a tuple containing (Democratic_base_states, Republican_base_states, Swing_states) where base states are all those for which the polling margin is outside the threshold, and swing state everything else.""" po = polls[date] dem_base = [(k,v[0]) for k,v in list(po.items()) if v[0]>threshold] rep_base = [(k,v[0]) for k,v in list(po.items()) if v[0]<-threshold] swing_states = [(k,v[0]) for k,v in list(po.items()) if abs(v[0])<=threshold] return dem_base, rep_base, swing_states def EnumerateAllScenarios(n): """Enumerate all permutations of {-1,+1}**n""" if n > 20: print("""Warning: need to enumerate 2**%s=%s possible scenarios. This may take too much time and/or memory.""" % (n, 2**n)) return None return scipy.transpose([scipy.mod(scipy.arange(2**n)/2**i, 2) \ for i in range(n)])*2-1 def EnumerateOutcomes(evotes, polls, date, threshold=None): """From the polling data on a specified date, and a threshold percentage, enumerate all possible electoral vote outcomes. Define as base states all those with polling margins outside the threshold, and swing states all others. Enumerate over all swing state scenarios.""" if threshold is None: print("""Error: no swing states defined. Need to enumerate 2**51 possible scenarios. (Wouldn't be prudent...)""") return dbase, rbase, swing = GetBaseStatesAndSwingStates(polls, date, threshold) dbase_sum = sum(evotes[state] for state, spread in dbase) rbase_sum = sum(evotes[state] for state, spread in rbase) scenarios = EnumerateAllScenarios(len(swing)) if scenarios is None: return outcomes = [] for scenario in scenarios: dtot = dbase_sum rtot = rbase_sum for idx, item in enumerate(scenario): if item<0: dtot += evotes[swing[idx][0]] else: rtot += evotes[swing[idx][0]] outcomes.append((dtot, rtot)) return outcomes def demo(): date = GetCurrentDate() polls = GetPrincetonPollingData() ensemble = [SimulateElectionFromPolls(evotes, polls, date) \ for n in range(10000)] dtot_ensemble = [e[2][0] for e in ensemble] exact_dist = ComputeExactEVDistribution(evotes, polls, date) ev_outcomes = EnumerateOutcomes(evotes, polls, date, threshold=7) return date, polls, ensemble, dtot_ensemble, exact_dist, ev_outcomes if __name__ == '__main__': date, polls, ensemble, dtot_ensemble, exact_dist, ev_outcomes = demo() # Copyright (C) Cornell University # All rights reserved. # Apache License, Version 2.0