Compare commits
No commits in common. "08d9bf66cfde87e8dd023b7e2d29e2033512f4bc" and "88b7d3d03addcb7ab5d5fde6a1d40598316a06e4" have entirely different histories.
08d9bf66cf
...
88b7d3d03a
25
README.md
25
README.md
@ -1,3 +1,24 @@
|
|||||||
# Page View Time Series Visualizer
|
### Assignment
|
||||||
|
|
||||||
This is the boilerplate for the Page View Time Series Visualizer project. Instructions for building your project can be found at https://www.freecodecamp.org/learn/data-analysis-with-python/data-analysis-with-python-projects/page-view-time-series-visualizer
|
For this project you will visualize time series data using a line chart, bar chart, and box plots. You will use Pandas, Matplotlib, and Seaborn to visualize a dataset containing the number of page views each day on the freeCodeCamp.org forum from 2016-05-09 to 2019-12-03. The data visualizations will help you understand the patterns in visits and identify yearly and monthly growth.
|
||||||
|
|
||||||
|
Use the data to complete the following tasks:
|
||||||
|
* Use Pandas to import the data from "fcc-forum-pageviews.csv". Set the index to the "date" column.
|
||||||
|
* Clean the data by filtering out days when the page views were in the top 2.5% of the dataset or bottom 2.5% of the dataset.
|
||||||
|
* Create a `draw_line_plot` function that uses Matplotlib to draw a line chart similar to "examples/Figure_1.png". The title should be "Daily freeCodeCamp Forum Page Views 5/2016-12/2019". The label on the x axis should be "Date" and the label on the y axis should be "Page Views".
|
||||||
|
* Create a `draw_bar_plot` function that draws a bar chart similar to "examples/Figure_2.png". It should show average daily page views for each month grouped by year. The legend should show month labels and have a title of "Months". On the chart, the label on the x axis should be "Years" and the label on the y axis should be "Average Page Views".
|
||||||
|
* Create a `draw_box_plot` function that uses Searborn to draw two adjacent box plots similar to "examples/Figure_3.png". These box plots should show how the values are distributed within a given year or month and how it compares over time. The title of the first chart should be "Year-wise Box Plot (Trend)" and the title of the second chart should be "Month-wise Box Plot (Seasonality)". Make sure the month labels on bottom start at "Jan" and the x and x axis are labeled correctly.
|
||||||
|
|
||||||
|
For each chart, make sure to use a copy of the data frame. Unit tests are written for you under `test_module.py`.
|
||||||
|
|
||||||
|
### Development
|
||||||
|
|
||||||
|
For development, you can use `main.py` to test your functions. Click the "run" button and `main.py` will run.
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
We imported the tests from `test_module.py` to `main.py` for your convenience. The tests will run automatically whenever you hit the "run" button.
|
||||||
|
|
||||||
|
### Submitting
|
||||||
|
|
||||||
|
Copy your project's URL and submit it to freeCodeCamp.
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 84 KiB After Width: | Height: | Size: 108 KiB |
@ -3,8 +3,6 @@
|
|||||||
authors = ["Your Name <you@example.com>"]
|
authors = ["Your Name <you@example.com>"]
|
||||||
name = "root"
|
name = "root"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
description = ""
|
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
pandas = "*"
|
pandas = "*"
|
||||||
python = "^3.7"
|
python = "^3.7"
|
||||||
|
1231
solution.ipynb
1231
solution.ipynb
File diff suppressed because one or more lines are too long
@ -4,7 +4,7 @@ import matplotlib as mpl
|
|||||||
|
|
||||||
class DataCleaningTestCase(unittest.TestCase):
|
class DataCleaningTestCase(unittest.TestCase):
|
||||||
def test_data_cleaning(self):
|
def test_data_cleaning(self):
|
||||||
actual = int(time_series_visualizer.df.count(numeric_only=True))
|
actual = int(time_series_visualizer.df.count())
|
||||||
expected = 1238
|
expected = 1238
|
||||||
self.assertEqual(actual, expected, "Expected DataFrame count after cleaning to be 1238.")
|
self.assertEqual(actual, expected, "Expected DataFrame count after cleaning to be 1238.")
|
||||||
|
|
||||||
@ -26,7 +26,7 @@ class LinePlotTestCase(unittest.TestCase):
|
|||||||
expected = "Page Views"
|
expected = "Page Views"
|
||||||
self.assertEqual(actual, expected, "Expected line plot ylabel to be 'Page Views'")
|
self.assertEqual(actual, expected, "Expected line plot ylabel to be 'Page Views'")
|
||||||
|
|
||||||
def test_line_plot_data_quantity(self):
|
def test_line_plot_data_quatity(self):
|
||||||
actual = len(self.ax.lines[0].get_ydata())
|
actual = len(self.ax.lines[0].get_ydata())
|
||||||
expected = 1238
|
expected = 1238
|
||||||
self.assertEqual(actual, expected, "Expected number of data points in line plot to be 1238.")
|
self.assertEqual(actual, expected, "Expected number of data points in line plot to be 1238.")
|
||||||
@ -83,10 +83,10 @@ class BoxPlotTestCase(unittest.TestCase):
|
|||||||
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
|
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
|
||||||
actual = self.ax2.get_xlabel()
|
actual = self.ax2.get_xlabel()
|
||||||
expected = "Month"
|
expected = "Month"
|
||||||
self.assertEqual(actual, expected, "Expected box plot 2 xlabel to be 'Month'")
|
self.assertEqual(actual, expected, "Expected box plot 1 xlabel to be 'Month'")
|
||||||
actual = self.ax2.get_ylabel()
|
actual = self.ax2.get_ylabel()
|
||||||
expected = "Page Views"
|
expected = "Page Views"
|
||||||
self.assertEqual(actual, expected, "Expected box plot 2 ylabel to be 'Page Views'")
|
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
|
||||||
actual = []
|
actual = []
|
||||||
for label in self.ax1.get_xaxis().get_majorticklabels():
|
for label in self.ax1.get_xaxis().get_majorticklabels():
|
||||||
actual.append(label.get_text())
|
actual.append(label.get_text())
|
||||||
@ -109,9 +109,9 @@ class BoxPlotTestCase(unittest.TestCase):
|
|||||||
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Year-wise Box Plot (Trend)'")
|
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Year-wise Box Plot (Trend)'")
|
||||||
actual = self.ax2.get_title()
|
actual = self.ax2.get_title()
|
||||||
expected = "Month-wise Box Plot (Seasonality)"
|
expected = "Month-wise Box Plot (Seasonality)"
|
||||||
self.assertEqual(actual, expected, "Expected box plot 2 title to be 'Month-wise Box Plot (Seasonality)'")
|
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Month-wise Box Plot (Seasonality)'")
|
||||||
|
|
||||||
def test_box_plot_number_of_boxes(self):
|
def test_box_plot_number_of_boxs(self):
|
||||||
actual = len(self.ax1.lines) / 6 # Every box has 6 lines
|
actual = len(self.ax1.lines) / 6 # Every box has 6 lines
|
||||||
expected = 4
|
expected = 4
|
||||||
self.assertEqual(actual, expected, "Expected four boxes in box plot 1")
|
self.assertEqual(actual, expected, "Expected four boxes in box plot 1")
|
||||||
|
@ -1,112 +1,53 @@
|
|||||||
import calendar
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import seaborn as sns
|
import seaborn as sns
|
||||||
from pandas.plotting import register_matplotlib_converters
|
from pandas.plotting import register_matplotlib_converters
|
||||||
|
|
||||||
register_matplotlib_converters()
|
register_matplotlib_converters()
|
||||||
|
|
||||||
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
|
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
|
||||||
df = pd.read_csv("fcc-forum-pageviews.csv")
|
df = None
|
||||||
|
|
||||||
# Clean data
|
# Clean data
|
||||||
df = df[(
|
df = None
|
||||||
df['value'] >= df['value'].quantile(0.025))
|
|
||||||
& (df['value'] <= df['value'].quantile(0.975))]
|
|
||||||
|
|
||||||
|
|
||||||
def draw_line_plot():
|
def draw_line_plot():
|
||||||
# Draw line plot
|
# Draw line plot
|
||||||
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
|
|
||||||
sns.lineplot(df["value"], estimator=None).set(
|
|
||||||
xlabel="Date",
|
|
||||||
ylabel="Page Views",
|
|
||||||
xticklabels=df.index,
|
|
||||||
title='Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
|
|
||||||
|
|
||||||
# Save image and return fig (don't change this part)
|
# Save image and return fig (don't change this part)
|
||||||
fig.savefig('line_plot.png')
|
fig.savefig('line_plot.png')
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def draw_bar_plot():
|
def draw_bar_plot():
|
||||||
df = pd.read_csv(
|
|
||||||
"fcc-forum-pageviews.csv", parse_dates=True, index_col=[0])
|
|
||||||
df = df[
|
|
||||||
(df['value'] >= df['value'].quantile(0.025))
|
|
||||||
& (df['value'] <= df['value'].quantile(0.975))]
|
|
||||||
# Copy and modify data for monthly bar plot
|
# Copy and modify data for monthly bar plot
|
||||||
df['year'] = df.index.year
|
df_bar = None
|
||||||
df['month'] = df.index.month
|
|
||||||
|
|
||||||
# Draw bar plot
|
# Draw bar plot
|
||||||
df.groupby(['year', 'month']).mean()
|
|
||||||
df_bar = df.groupby([df.index.year, df.index.month]).mean()
|
|
||||||
df_bar['month'] = df_bar['month'].apply(
|
|
||||||
lambda x: calendar.month_name[int(x)])
|
|
||||||
df_bar = df_bar.astype({'year': 'int'})
|
|
||||||
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
|
|
||||||
sns.barplot(
|
|
||||||
data=df_bar,
|
|
||||||
x='year',
|
|
||||||
y='value',
|
|
||||||
hue='month',
|
|
||||||
hue_order=[
|
|
||||||
'January',
|
|
||||||
'February',
|
|
||||||
'March',
|
|
||||||
'April',
|
|
||||||
'May',
|
|
||||||
'June',
|
|
||||||
'July',
|
|
||||||
'August',
|
|
||||||
'September',
|
|
||||||
'October',
|
|
||||||
'November',
|
|
||||||
'December']).set(
|
|
||||||
xlabel='Years', ylabel='Average Page Views')
|
|
||||||
|
|
||||||
# Save image and return fig (don't change this part)
|
# Save image and return fig (don't change this part)
|
||||||
fig.savefig('bar_plot.png')
|
fig.savefig('bar_plot.png')
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
|
|
||||||
def draw_box_plot():
|
def draw_box_plot():
|
||||||
# Prepare data for box plots (this part is done!)
|
# Prepare data for box plots (this part is done!)
|
||||||
df = pd.read_csv("fcc-forum-pageviews.csv", parse_dates=True, index_col=0)
|
|
||||||
df = df[(df['value'] >= df['value'].quantile(0.025))
|
|
||||||
& (df['value'] <= df['value'].quantile(0.975))]
|
|
||||||
df_box = df.copy()
|
df_box = df.copy()
|
||||||
df_box.reset_index(inplace=True)
|
df_box.reset_index(inplace=True)
|
||||||
df_box['year'] = [d.year for d in df_box.date]
|
df_box['year'] = [d.year for d in df_box.date]
|
||||||
df_box['month'] = [d.strftime('%b') for d in df_box.date]
|
df_box['month'] = [d.strftime('%b') for d in df_box.date]
|
||||||
|
|
||||||
# Draw box plots (using Seaborn)
|
# Draw box plots (using Seaborn)
|
||||||
fig, axs = plt.subplots(ncols=2, figsize=(24, 10))
|
|
||||||
sns.boxplot(data=df_box, ax=axs[0], x="year", y="value").set(
|
|
||||||
title="Year-wise Box Plot (Trend)", ylabel="Page Views", xlabel="Year")
|
|
||||||
sns.boxplot(
|
|
||||||
data=df_box,
|
|
||||||
ax=axs[1],
|
|
||||||
x="month",
|
|
||||||
y="value",
|
|
||||||
order=[
|
|
||||||
"Jan",
|
|
||||||
"Feb",
|
|
||||||
"Mar",
|
|
||||||
"Apr",
|
|
||||||
"May",
|
|
||||||
"Jun",
|
|
||||||
"Jul",
|
|
||||||
"Aug",
|
|
||||||
"Sep",
|
|
||||||
"Oct",
|
|
||||||
"Nov",
|
|
||||||
"Dec"]).set(
|
|
||||||
title="Month-wise Box Plot (Seasonality)",
|
|
||||||
ylabel="Page Views",
|
|
||||||
xlabel="Month")
|
|
||||||
|
|
||||||
# Save image and return fig (don't change this part)
|
# Save image and return fig (don't change this part)
|
||||||
fig.savefig('box_plot.png')
|
fig.savefig('box_plot.png')
|
||||||
|
Loading…
Reference in New Issue
Block a user