# University Housing Turnout


Cambridge is home to three Universities (in order of total enrollment): Harvard, MIT, Lesley

In [None]:
# %matplotlib ipympl
from datetime import timedelta

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ma_voter_analysis.plotting import *
from ma_voter_analysis.processing import *
from ma_voter_analysis.university import *

In [None]:
MUNICIPAL_YEARS = np.arange(2011, 2023, 2)
STATE_YEARS = np.arange(2012, 2023, 2)
NATIONAL_YEARS = np.arange(2012, 2023, 4)
ALL_YEARS = np.arange(2011, 2023, 1)
figsize = (16, 8)
voters = pd.read_csv(
    "../../data/cambridge/general-vote-history.csv", low_memory=False
).set_index(["year", "Voter ID Number"])

In [None]:
voters["univ_housing_name"] = ""
gsas_idx = find_housing_idxs(voters, gsas_dorms)
harvard_ugrad_idx = find_housing_idxs(voters, harvard_ugrad_houses)
hvd_law_idx = find_housing_idxs(voters, hvd_law_dorms)
harvard_housing_idx = find_housing_idxs(voters, harvard_housing)
harvard_1st_idx = find_housing_idxs(voters, harvard_1st_year)
mit_ugrad_idx = find_housing_idxs(voters, mit_dorms)
mit_grad_idx = find_housing_idxs(voters, mit_grad_housing)
lesley_housing_idx = find_housing_idxs(voters, lesley_housing)

## Harvard College
### Harvard First Year Houses

In [None]:
fig, axs = multi_year_bar(
    voters, university_housing_bar_chart, idx=harvard_1st_idx["all"], figsize=figsize
)
fig.suptitle("Harvard 1st Year Housing")
ax = axs[-1, -1]
ax.set_xticks(
    ax.get_xticks(), labels=[l.get_text().split()[0] for l in ax.get_xticklabels()]
)
plt.legend()
plt.tight_layout()

### Harvard Undergraduate Houses


In [None]:
fig, axs = multi_year_bar(
    voters, university_housing_bar_chart, idx=harvard_ugrad_idx["all"], figsize=figsize
)
fig.suptitle("Harvard Undergrad Houses turnout")
ax = axs[-1, -1]
ax.set_xticks(
    ax.get_xticks(), labels=[l.get_text().split()[0] for l in ax.get_xticklabels()]
)
plt.legend()
plt.tight_layout()

### Harvard Grad Dorms

In [None]:
fig, axs = multi_year_bar(
    voters, university_housing_bar_chart, idx=gsas_idx["all"], figsize=figsize
)
fig.suptitle("Harvard GSAS Dorms Turnout")
ax = axs[-1, -1]
ax.set_xticks(
    ax.get_xticks(), labels=[l.get_text().split()[0] for l in ax.get_xticklabels()]
)
plt.legend()
plt.tight_layout()

### Harvard University Housing


[Harvard University Housing](https://www.huhousing.harvard.edu/) is separate from the grad dorms and undergrad house system. They rent out apartments to graduate students. A map of their properties can be found [here](https://osa.gse.harvard.edu/files/gse-osa/files/hu_housing_map.pdf)



In [None]:
fig, axs = multi_year_bar(
    voters,
    university_housing_bar_chart,
    idx=harvard_housing_idx["all"],
    figsize=figsize,
)
fig.suptitle("Harvard Grad Housing Turnout")
plt.legend()
plt.tight_layout()

### Harvard Law School Dorms


In [None]:
fig, axs = multi_year_bar(
    voters, university_housing_bar_chart, idx=hvd_law_idx["all"], figsize=figsize
)
fig.suptitle("Harvard Law Dorms Turnout")
ax = axs[-1, -1]
ax.set_xticks(
    ax.get_xticks(), labels=[l.get_text().split()[0] for l in ax.get_xticklabels()]
)
plt.legend()
plt.tight_layout()

### All Harvard Dorms/Housing

In [None]:
# %matplotlib ipympl

df = turnout_by_year_key(voters, "univ_housing_name")

# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
housing_sets = [
    harvard_1st_year,
    harvard_ugrad_houses,
    gsas_dorms,
    harvard_housing,
    hvd_law_dorms,
]
fig, axs = plt.subplots(len(housing_sets), 3, figsize=(16, 20))
for row, buildings in enumerate(housing_sets):
    for building in buildings:
        axs[row][0].plot(
            df.loc[building]["registered"], "o--", label=building.split()[0]
        )
        axs[row][1].plot(df.loc[building]["voted"], "o--", label=building.split()[0])
        axs[row][2].plot(df.loc[building]["turnout"] * 100, "o--", label=building)
    axs[row][0].set_title("Registered")
    axs[row][1].set_title("Voted")
    axs[row][2].set_title("Turnout %")
    axs[row, 2].legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)
    axs[row, 2].set_ylim([0, 100])
plt.tight_layout()

## Harvard Undergraduate Total turnout

Because the harvard undegraduate population nearly entirely lives in the house system we can assess the turnout of Harvard Undergraduates as a whole based on the enrollment numbers available from the [Harvard Factbook](https://oira.harvard.edu/factbook/fact-book-enrollment/).

Unfortunately the breakdown of international or US citizen is currently only available for 2022/2023. But based on some very cursory googling it seem that each year has 10-15% international enrollment. So for the following analysis we will assume 12.5% international undergraduates at Harvard.

The big takeaway here is that Harvard Undergraduate Registration numbers have steadily declined over the past decade, down to a paltry 13% of eligible Harvard undergraduates having registered to vote. However, due to fluctuations in enrollment size and turnout percent the number of Harvard undergraduates who vote has remained at the same (low) level. 


In [None]:
hvd_college_enrollment = pd.Series(
    {
        2013: 6671,
        2014: 6636,
        2015: 6634,
        2016: 6645,
        2017: 6699,
        2018: 6722,
        2019: 6716,
        2020: 5212,
        2021: 7095,
        2022: 7178,
    }
)

# pd.Series(hvd_college_enrollment)

In [None]:
all_hvd_ugrad = pd.DataFrame(voters[harvard_1st_idx["all"] | harvard_ugrad_idx["all"]])
# all_hvd_ugrad = pd.DataFrame(voters[mit_ugrad_idx['all']])
all_hvd_ugrad
all_hvd_ugrad["registered"] = 1
hvd_yearly_turnout = (
    all_hvd_ugrad.reset_index().groupby("year").sum()[["voted", "registered"]]
)
hvd_yearly_turnout["turnout"] = (
    hvd_yearly_turnout["voted"] / hvd_yearly_turnout["registered"]
)
hvd_yearly_turnout["enrollment"] = hvd_college_enrollment
hvd_yearly_turnout["us-citizens"] = hvd_yearly_turnout["enrollment"] * (1 - 0.125)
hvd_yearly_turnout["registered-percent"] = (
    hvd_yearly_turnout["registered"] / hvd_yearly_turnout["us-citizens"]
)
hvd_yearly_turnout

In [None]:
plt.figure()
plt.title("Harvard Undergrads Registration and Voting Percentages")
(hvd_yearly_turnout["registered-percent"] * 100).plot(
    marker="o", label="% of ugrads Registered"
)
(hvd_yearly_turnout["turnout"] * 100).plot(
    marker="o", label="% Registered Ugrads who Voted"
)
plt.ylabel("%")
plt.legend()
plt.tight_layout()

In [None]:
plt.figure()
plt.title("Harvard Undergrads Absolute Registration and Voters")
hvd_yearly_turnout["registered"].plot(marker="o", label="# Registered")
hvd_yearly_turnout["voted"].plot(marker="o", label="# Voted")
plt.ylabel("Number of People")
plt.legend()
plt.tight_layout()

## MIT
### MIT Undergraduate Dorms

In [None]:
fig, axs = multi_year_bar(
    voters, university_housing_bar_chart, idx=mit_ugrad_idx["all"], figsize=figsize
)
fig.suptitle("MIT Undergraduate Dorms")
plt.legend()
plt.tight_layout()

### MIT Graduate Housing

In [None]:
fig, axs = multi_year_bar(
    voters, university_housing_bar_chart, idx=mit_grad_idx["all"], figsize=figsize
)
fig.suptitle("MIT Graduate Housing")
plt.legend()
plt.tight_layout();

### All MIT Dorms/Housing

In [None]:
df = turnout_by_year_key(voters, "univ_housing_name")

# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
housing_sets = [mit_dorms, mit_grad_housing]
fig, axs = plt.subplots(len(housing_sets), 3, figsize=(16, 6))
for row, buildings in enumerate(housing_sets):
    for building in buildings:
        axs[row][0].plot(
            df.loc[building]["registered"], "o--", label=building.split()[0]
        )
        axs[row][1].plot(df.loc[building]["voted"], "o--", label=building.split()[0])
        axs[row][2].plot(df.loc[building]["turnout"] * 100, "o--", label=building)
    axs[row][0].set_title("Registered")
    axs[row][1].set_title("Voted")
    axs[row][2].set_title("Turnout %")
    axs[row, 2].legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)
    axs[row, 2].set_ylim([0, 100])
plt.tight_layout()

### MIT Undergraduate Total turnout

```{admonition} TODO
This should be possible to calculate using the information from https://registrar.mit.edu/statistics-reports/enrollment-statistics-year# But has yet to be implemented here.
```


## Lesley

We cannot calculate total turnout of Lesley students as not all students live on campus (78% of first years per https://lesley.edu/life-at-lesley/campus-life/housing). So we cannot calculate the correct percentage of international students who live in the dorms.

### Lesley Undergraduates

In [None]:
fig, axs = multi_year_bar(
    voters, university_housing_bar_chart, idx=lesley_housing_idx["all"], figsize=figsize
)
fig.suptitle("MIT Undergraduate Dorms")
ax = axs[-1, -1]
ax.set_xticks(
    ax.get_xticks(), labels=[l.get_text().split()[0] for l in ax.get_xticklabels()]
)
plt.legend()
plt.tight_layout()

In [None]:
df = turnout_by_year_key(voters, "univ_housing_name")

# groupby is the easiest way i could see to get the order I wanted
# couldn't get the reorder levels to work properly :/
df = df.groupby(["univ_housing_name", "year"]).mean()
housing_sets = [lesley_housing]
fig, axs = plt.subplots(len(housing_sets), 3, figsize=(16, 6))
for building in lesley_housing:
    axs[0].plot(df.loc[building]["registered"], "o--", label=building.split()[0])
    axs[1].plot(df.loc[building]["voted"], "o--", label=building.split()[0])
    axs[2].plot(df.loc[building]["turnout"] * 100, "o--", label=building)
axs[0].set_title("Registered")
axs[1].set_title("Voted")
axs[2].set_title("Turnout %")
axs[2].legend(bbox_to_anchor=(1.05, 1), loc="upper left", borderaxespad=0.0)
axs[2].set_ylim([0, 100])
plt.tight_layout()