You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 lines
3.3 KiB
Python

11 months ago
import calendar
4 years ago
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
11 months ago
4 years ago
register_matplotlib_converters()
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
11 months ago
df = pd.read_csv("fcc-forum-pageviews.csv")
4 years ago
# Clean data
11 months ago
df = df[(
df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
4 years ago
def draw_line_plot():
# Draw line plot
11 months ago
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
sns.lineplot(df["value"], estimator=None).set(
xlabel="Date",
ylabel="Page Views",
xticklabels=df.index,
title='Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
4 years ago
# Save image and return fig (don't change this part)
fig.savefig('line_plot.png')
return fig
11 months ago
4 years ago
def draw_bar_plot():
11 months ago
df = pd.read_csv(
"fcc-forum-pageviews.csv", parse_dates=True, index_col=[0])
df = df[
(df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
4 years ago
# Copy and modify data for monthly bar plot
11 months ago
df['year'] = df.index.year
df['month'] = df.index.month
4 years ago
# Draw bar plot
11 months ago
df.groupby(['year', 'month']).mean()
df_bar = df.groupby([df.index.year, df.index.month]).mean()
df_bar['month'] = df_bar['month'].apply(
lambda x: calendar.month_name[int(x)])
df_bar = df_bar.astype({'year': 'int'})
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
sns.barplot(
data=df_bar,
x='year',
y='value',
hue='month',
hue_order=[
'January',
'February',
'March',
'April',
'May',
'June',
'July',
'August',
'September',
'October',
'November',
'December']).set(
xlabel='Years', ylabel='Average Page Views')
4 years ago
# Save image and return fig (don't change this part)
fig.savefig('bar_plot.png')
return fig
11 months ago
4 years ago
def draw_box_plot():
# Prepare data for box plots (this part is done!)
11 months ago
df = pd.read_csv("fcc-forum-pageviews.csv", parse_dates=True, index_col=0)
df = df[(df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
4 years ago
df_box = df.copy()
df_box.reset_index(inplace=True)
df_box['year'] = [d.year for d in df_box.date]
df_box['month'] = [d.strftime('%b') for d in df_box.date]
# Draw box plots (using Seaborn)
11 months ago
fig, axs = plt.subplots(ncols=2, figsize=(24, 10))
sns.boxplot(data=df_box, ax=axs[0], x="year", y="value").set(
title="Year-wise Box Plot (Trend)", ylabel="Page Views", xlabel="Year")
sns.boxplot(
data=df_box,
ax=axs[1],
x="month",
y="value",
order=[
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec"]).set(
title="Month-wise Box Plot (Seasonality)",
ylabel="Page Views",
xlabel="Month")
4 years ago
# Save image and return fig (don't change this part)
fig.savefig('box_plot.png')
return fig