You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 lines
3.3 KiB
Python

import calendar
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
df = pd.read_csv("fcc-forum-pageviews.csv")
# Clean data
df = df[(
df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
def draw_line_plot():
# Draw line plot
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
sns.lineplot(df["value"], estimator=None).set(
xlabel="Date",
ylabel="Page Views",
xticklabels=df.index,
title='Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
# Save image and return fig (don't change this part)
fig.savefig('line_plot.png')
return fig
def draw_bar_plot():
df = pd.read_csv(
"fcc-forum-pageviews.csv", parse_dates=True, index_col=[0])
df = df[
(df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
# Copy and modify data for monthly bar plot
df['year'] = df.index.year
df['month'] = df.index.month
# Draw bar plot
df.groupby(['year', 'month']).mean()
df_bar = df.groupby([df.index.year, df.index.month]).mean()
df_bar['month'] = df_bar['month'].apply(
lambda x: calendar.month_name[int(x)])
df_bar = df_bar.astype({'year': 'int'})
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
sns.barplot(
data=df_bar,
x='year',
y='value',
hue='month',
hue_order=[
'January',
'February',
'March',
'April',
'May',
'June',
'July',
'August',
'September',
'October',
'November',
'December']).set(
xlabel='Years', ylabel='Average Page Views')
# Save image and return fig (don't change this part)
fig.savefig('bar_plot.png')
return fig
def draw_box_plot():
# Prepare data for box plots (this part is done!)
df = pd.read_csv("fcc-forum-pageviews.csv", parse_dates=True, index_col=0)
df = df[(df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
df_box = df.copy()
df_box.reset_index(inplace=True)
df_box['year'] = [d.year for d in df_box.date]
df_box['month'] = [d.strftime('%b') for d in df_box.date]
# Draw box plots (using Seaborn)
fig, axs = plt.subplots(ncols=2, figsize=(24, 10))
sns.boxplot(data=df_box, ax=axs[0], x="year", y="value").set(
title="Year-wise Box Plot (Trend)", ylabel="Page Views", xlabel="Year")
sns.boxplot(
data=df_box,
ax=axs[1],
x="month",
y="value",
order=[
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec"]).set(
title="Month-wise Box Plot (Seasonality)",
ylabel="Page Views",
xlabel="Month")
# Save image and return fig (don't change this part)
fig.savefig('box_plot.png')
return fig