68 lines
2.4 KiB
Python
68 lines
2.4 KiB
Python
import pandas as pd
|
|
import seaborn as sns
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
|
|
# Import data
|
|
df = pd.read_csv("medical_examination.csv")
|
|
|
|
# Add 'overweight' column
|
|
df['overweight'] = (df['weight']/((df['height']/100)**2) > 25).astype('int8')
|
|
|
|
# Normalize data by making 0 always good and 1 always bad. If the value of 'cholesterol' or 'gluc' is 1, make the value 0. If the value is more than 1, make the value 1.
|
|
df['cholesterol'] = (df['cholesterol'] != 1).astype('int8')
|
|
df['gluc'] = (df['gluc'] != 1).astype('int8')
|
|
|
|
|
|
# Draw Categorical Plot
|
|
def draw_cat_plot():
|
|
# Create DataFrame for cat plot using `pd.melt` using just the values from 'cholesterol', 'gluc', 'smoke', 'alco', 'active', and 'overweight'.
|
|
df0 = df[df['cardio'] == 0]
|
|
df0long = pd.melt(df0, value_vars=['active', 'alco', 'cholesterol', 'gluc', 'overweight', 'smoke'])
|
|
df1 = df[df['cardio'] == 1]
|
|
df1long = pd.melt(df1, value_vars=['active', 'alco', 'cholesterol', 'gluc', 'overweight', 'smoke'])
|
|
|
|
|
|
# Draw the catplot with 'sns.catplot()'
|
|
fig, axs = plt.subplots(ncols=2, figsize=(15, 5))
|
|
plot0 = sns.countplot(data=df0long, x='variable', hue='value', ax=axs[0]).set(title='cadio = 0', ylabel="total")
|
|
axs[0].legend([],[], frameon=False)
|
|
plot1 = sns.countplot(data=df1long, x='variable', hue='value', ax=axs[1]).set(title='cadio = 1', ylabel="total")
|
|
sns.move_legend(axs[1], "right", bbox_to_anchor=(1.15, 0.5))
|
|
|
|
# Do not modify the next two lines
|
|
fig.savefig('catplot.png')
|
|
return fig
|
|
|
|
|
|
# Draw Heat Map
|
|
def draw_heat_map():
|
|
# Clean the data
|
|
global df
|
|
df = df[df['ap_lo'] <= df['ap_hi']]
|
|
df = df[(df['height'] >= df['height'].quantile(0.025)) & (df['height'] <= df['height'].quantile(0.975))]
|
|
df = df[(df['weight'] >= df['weight'].quantile(0.025)) & (df['weight'] <= df['weight'].quantile(0.975))]
|
|
|
|
# Calculate the correlation matrix
|
|
def abs_zero(x):
|
|
for i in range(len(x)):
|
|
if x[i] == 0:
|
|
x[i] = abs(x[i])
|
|
return x
|
|
corr = df.corr().round(decimals=1).transform(abs_zero)
|
|
|
|
# Generate a mask for the upper triangle
|
|
mask = np.triu(np.ones_like(corr, dtype=bool))
|
|
|
|
|
|
# Set up the matplotlib figure
|
|
fig, axs = plt.subplots(ncols=1, figsize=(10, 10))
|
|
|
|
# Draw the heatmap with 'sns.heatmap()'
|
|
sns.heatmap(corr, mask=mask, annot=True, fmt=".1f")
|
|
|
|
|
|
# Do not modify the next two lines
|
|
fig.savefig('heatmap.png')
|
|
return fig
|