Green Space and Performance#

Explore the relationship between green space access and different health conditions

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder
# load data
health = pd.read_csv('../data/CitieSHealth_BCN_DATA_PanelStudy_20220414.csv')
health.head()
ID_Zenodo date_all year month day dayoftheweek hour mentalhealth_survey occurrence_mental bienestar ... education covid_work covid_mood covid_sleep covid_espacios covid_aire covid_motor covid_electric covid_bikewalk covid_public_trans
0 71 22190 2020 10 2 4 18 Yes 1.0 9.0 ... Universitario Ha empeorado mucho Ha empeorado mucho Ha empeorado un poco Le doy más importancia que antes Le doy más importancia que antes Lo utilizo igual que antes Lo utilizo igual que antes Lo utilizo más que antes Lo utilizo igual que antes
1 112 22202 2020 10 14 2 22 Yes 2.0 8.0 ... Universitario Ha empeorado mucho Ha empeorado un poco No ha cambiado Le doy más importancia que antes Le doy más importancia que antes Lo utilizo igual que antes NaN Lo utilizo más que antes Lo utilizo menos que antes
2 110 22217 2020 10 29 3 18 Yes 10.0 9.0 ... Universitario Ha empeorado mucho No ha cambiado No ha cambiado Le doy más importancia que antes Le doy más importancia que antes Lo utilizo igual que antes Lo utilizo más que antes Lo utilizo más que antes Lo utilizo menos que antes
3 115 22222 2020 11 3 1 18 Yes 14.0 3.0 ... Universitario Ha empeorado mucho Ha empeorado un poco Ha empeorado un poco Le doy más importancia que antes Le doy más importancia que antes Lo utilizo igual que antes Lo utilizo igual que antes Lo utilizo igual que antes Lo utilizo menos que antes
4 135 22231 2020 11 12 3 22 Yes 12.0 9.0 ... Universitario Ha empeorado mucho Ha empeorado un poco Ha empeorado un poco Le doy más importancia que antes No ha cambiado NaN NaN Lo utilizo menos que antes Lo utilizo más que antes

5 rows × 95 columns

# The list of variables that we are intersted in
variables = ['access_greenbluespaces_300mbuff',
             'performance', 'estres', 'energia', 'sueno', 
             'horasfuera'] #'dieta', 'alcohol', 'drogas', 'enfermo', 'smoke', 'psycho',]

renamed_variables = {'access_greenbluespaces_300mbuff': 'access_greenblue_spaces',
                     'estres': 'stress_level', 'energia': 'energy_level',
                     'sueno':'sleep_quality', 'horasfuera': 'time _outdoors',
                     'dieta': 'diet', 'drogas':'drug_use', 'enfermo': 'illness'
                     }

health_clean = health.loc[:, variables].rename(columns = renamed_variables).dropna()

health_clean.head()
access_greenblue_spaces performance stress_level energy_level sleep_quality time _outdoors
1 Yes 51.240993 5.0 9.0 8.0 5.0
2 Yes 56.006067 8.0 5.0 9.0 8.0
3 Yes 58.177117 5.0 2.0 2.0 5.0
4 Yes 71.481757 6.0 9.0 2.0 22.0
6 No 46.047583 5.0 5.0 10.0 3.0
def ohe(data, column):
    enc = OneHotEncoder()
    enc.fit(data[column])
    encoded_data = pd.DataFrame(enc.transform(data[column]).toarray().astype(int))
    encoded_data.columns = enc.get_feature_names_out()
    encoded_data = encoded_data.set_index(data.index)
    return encoded_data

numerical = list((health_clean.dtypes[health_clean.dtypes == 'float64'].index) | (health_clean.dtypes[health_clean.dtypes == 'int64'].index))
categorical = list((health_clean.dtypes[health_clean.dtypes != 'float64'].index) & (health_clean.dtypes[health_clean.dtypes != 'int64'].index))

one_hot_health_clean = pd.concat([health_clean[numerical], ohe(health_clean[categorical], categorical)], axis=1)
one_hot_health_clean.head()
/tmp/ipykernel_9643/101148290.py:9: FutureWarning: Index.__or__ operating as a set operation is deprecated, in the future this will be a logical operation matching Series.__or__.  Use index.union(other) instead.
  numerical = list((health_clean.dtypes[health_clean.dtypes == 'float64'].index) | (health_clean.dtypes[health_clean.dtypes == 'int64'].index))
/tmp/ipykernel_9643/101148290.py:10: FutureWarning: Index.__and__ operating as a set operation is deprecated, in the future this will be a logical operation matching Series.__and__.  Use index.intersection(other) instead.
  categorical = list((health_clean.dtypes[health_clean.dtypes != 'float64'].index) & (health_clean.dtypes[health_clean.dtypes != 'int64'].index))
performance stress_level energy_level sleep_quality time _outdoors access_greenblue_spaces_No access_greenblue_spaces_Yes
1 51.240993 5.0 9.0 8.0 5.0 0 1
2 56.006067 8.0 5.0 9.0 8.0 0 1
3 58.177117 5.0 2.0 2.0 5.0 0 1
4 71.481757 6.0 9.0 2.0 22.0 0 1
6 46.047583 5.0 5.0 10.0 3.0 1 0
sns.pairplot(one_hot_health_clean, hue = "access_greenblue_spaces_Yes")
plt.savefig('../outputs/relation_btw_health_and_green')
../_images/95ccc50cd14c8e198deaaa020a700b0fd4aefa59901a1db5df12d2a999f41820.png
sns.violinplot(data = health_clean, y = 'performance', x = 'access_greenblue_spaces');
plt.savefig('../outputs/performance vs green_access')
../_images/7f3eb014d69d48fdbbcb66d667f95a359a21ae12356d221652593db2ee57ae51.png
sns.histplot(data = health_clean, 
             x = 'stress_level', 
             hue = 'access_greenblue_spaces', 
             kde = True,
             stat = "density", 
             common_norm = False);
plt.savefig('../outputs/hist_of_stress_level')
../_images/696193c20de47b9bba9a5329ba55f1c297b5c4ff21d5783e3966dab16773bcca.png
sns.histplot(data = health_clean, 
             x = 'energy_level', 
             hue = 'access_greenblue_spaces', 
             kde = True,
             stat = "density", 
             common_norm = False);	
plt.savefig('../outputs/hist_of_energy_level')
../_images/b83e92f5e2c261e6c367e3a5fe4a8cd4de4cbcfddd44f00542f199b9fc5152c8.png
sns.histplot(data = health_clean, 
             x = 'sleep_quality', 
             hue = 'access_greenblue_spaces', 
             kde = True,
             stat = "density", 
             common_norm = False);
plt.savefig('../outputs/hist_of_sleep_quality')
../_images/5188f1e3312a941395f4f676b66552ab8c46575925208d27cda08806c1e5edda.png
sns.histplot(data = health_clean, 
             x = 'time _outdoors', 
             hue = 'access_greenblue_spaces', 
             kde = True,
             stat = "density", 
             common_norm = False);
plt.savefig('../outputs/hist_of_outdoor_time')
../_images/317e67971738b8b52f6c537331ebd77aabd24698bea405b3764c16ca6f8f024e.png