Compare commits

...

10 Commits

Author SHA1 Message Date
Manish 08d9bf66cf Solution 11 months ago
Prince Mendiratta c097d7f434
fix: remove .replit and replit.nix files (#11)
Signed-off-by: Prince Mendiratta <prince.mendi@gmail.com>

Signed-off-by: Prince Mendiratta <prince.mendi@gmail.com>
1 year ago
Manabu Matsumoto d872ea0320
fix: change the numbers in msg argument of test (#10) 2 years ago
Krzysztof G 37608af239
fix(config): add replit.nix config (#9) 2 years ago
Naomi Carrigan da0977ed06
chore: clean up readme (#8) 2 years ago
PBM aa5a4f29bd
fix: typos in test methods names (#7) 3 years ago
Kate c9ad486335
fix: update box plot example image to include full data (#5) 3 years ago
Jaikishan Brijwani 5a2c48afe4
proposed fix to #39244 (#3)
* proposed fix to #39244

* Update test_module.py
4 years ago
Tom 3f27096a3e
feat: add description to pyproject.toml (#2) 4 years ago
Tom 670401d15b
feat: add replit config (#1) 4 years ago

@ -1,24 +1,3 @@
### Assignment
# Page View Time Series Visualizer
For this project you will visualize time series data using a line chart, bar chart, and box plots. You will use Pandas, Matplotlib, and Seaborn to visualize a dataset containing the number of page views each day on the freeCodeCamp.org forum from 2016-05-09 to 2019-12-03. The data visualizations will help you understand the patterns in visits and identify yearly and monthly growth.
Use the data to complete the following tasks:
* Use Pandas to import the data from "fcc-forum-pageviews.csv". Set the index to the "date" column.
* Clean the data by filtering out days when the page views were in the top 2.5% of the dataset or bottom 2.5% of the dataset.
* Create a `draw_line_plot` function that uses Matplotlib to draw a line chart similar to "examples/Figure_1.png". The title should be "Daily freeCodeCamp Forum Page Views 5/2016-12/2019". The label on the x axis should be "Date" and the label on the y axis should be "Page Views".
* Create a `draw_bar_plot` function that draws a bar chart similar to "examples/Figure_2.png". It should show average daily page views for each month grouped by year. The legend should show month labels and have a title of "Months". On the chart, the label on the x axis should be "Years" and the label on the y axis should be "Average Page Views".
* Create a `draw_box_plot` function that uses Searborn to draw two adjacent box plots similar to "examples/Figure_3.png". These box plots should show how the values are distributed within a given year or month and how it compares over time. The title of the first chart should be "Year-wise Box Plot (Trend)" and the title of the second chart should be "Month-wise Box Plot (Seasonality)". Make sure the month labels on bottom start at "Jan" and the x and x axis are labeled correctly.
For each chart, make sure to use a copy of the data frame. Unit tests are written for you under `test_module.py`.
### Development
For development, you can use `main.py` to test your functions. Click the "run" button and `main.py` will run.
### Testing
We imported the tests from `test_module.py` to `main.py` for your convenience. The tests will run automatically whenever you hit the "run" button.
### Submitting
Copy your project's URL and submit it to freeCodeCamp.
This is the boilerplate for the Page View Time Series Visualizer project. Instructions for building your project can be found at https://www.freecodecamp.org/learn/data-analysis-with-python/data-analysis-with-python-projects/page-view-time-series-visualizer

Binary file not shown.

Before

Width:  |  Height:  |  Size: 108 KiB

After

Width:  |  Height:  |  Size: 84 KiB

@ -3,6 +3,8 @@
authors = ["Your Name <you@example.com>"]
name = "root"
version = "0.0.0"
description = ""
[tool.poetry.dependencies]
pandas = "*"
python = "^3.7"

File diff suppressed because one or more lines are too long

@ -4,7 +4,7 @@ import matplotlib as mpl
class DataCleaningTestCase(unittest.TestCase):
def test_data_cleaning(self):
actual = int(time_series_visualizer.df.count())
actual = int(time_series_visualizer.df.count(numeric_only=True))
expected = 1238
self.assertEqual(actual, expected, "Expected DataFrame count after cleaning to be 1238.")
@ -26,7 +26,7 @@ class LinePlotTestCase(unittest.TestCase):
expected = "Page Views"
self.assertEqual(actual, expected, "Expected line plot ylabel to be 'Page Views'")
def test_line_plot_data_quatity(self):
def test_line_plot_data_quantity(self):
actual = len(self.ax.lines[0].get_ydata())
expected = 1238
self.assertEqual(actual, expected, "Expected number of data points in line plot to be 1238.")
@ -83,10 +83,10 @@ class BoxPlotTestCase(unittest.TestCase):
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
actual = self.ax2.get_xlabel()
expected = "Month"
self.assertEqual(actual, expected, "Expected box plot 1 xlabel to be 'Month'")
self.assertEqual(actual, expected, "Expected box plot 2 xlabel to be 'Month'")
actual = self.ax2.get_ylabel()
expected = "Page Views"
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
self.assertEqual(actual, expected, "Expected box plot 2 ylabel to be 'Page Views'")
actual = []
for label in self.ax1.get_xaxis().get_majorticklabels():
actual.append(label.get_text())
@ -109,9 +109,9 @@ class BoxPlotTestCase(unittest.TestCase):
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Year-wise Box Plot (Trend)'")
actual = self.ax2.get_title()
expected = "Month-wise Box Plot (Seasonality)"
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Month-wise Box Plot (Seasonality)'")
self.assertEqual(actual, expected, "Expected box plot 2 title to be 'Month-wise Box Plot (Seasonality)'")
def test_box_plot_number_of_boxs(self):
def test_box_plot_number_of_boxes(self):
actual = len(self.ax1.lines) / 6 # Every box has 6 lines
expected = 4
self.assertEqual(actual, expected, "Expected four boxes in box plot 1")

@ -1,53 +1,112 @@
import calendar
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
df = None
df = pd.read_csv("fcc-forum-pageviews.csv")
# Clean data
df = None
df = df[(
df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
def draw_line_plot():
# Draw line plot
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
sns.lineplot(df["value"], estimator=None).set(
xlabel="Date",
ylabel="Page Views",
xticklabels=df.index,
title='Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
# Save image and return fig (don't change this part)
fig.savefig('line_plot.png')
return fig
def draw_bar_plot():
df = pd.read_csv(
"fcc-forum-pageviews.csv", parse_dates=True, index_col=[0])
df = df[
(df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
# Copy and modify data for monthly bar plot
df_bar = None
df['year'] = df.index.year
df['month'] = df.index.month
# Draw bar plot
df.groupby(['year', 'month']).mean()
df_bar = df.groupby([df.index.year, df.index.month]).mean()
df_bar['month'] = df_bar['month'].apply(
lambda x: calendar.month_name[int(x)])
df_bar = df_bar.astype({'year': 'int'})
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
sns.barplot(
data=df_bar,
x='year',
y='value',
hue='month',
hue_order=[
'January',
'February',
'March',
'April',
'May',
'June',
'July',
'August',
'September',
'October',
'November',
'December']).set(
xlabel='Years', ylabel='Average Page Views')
# Save image and return fig (don't change this part)
fig.savefig('bar_plot.png')
return fig
def draw_box_plot():
# Prepare data for box plots (this part is done!)
df = pd.read_csv("fcc-forum-pageviews.csv", parse_dates=True, index_col=0)
df = df[(df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
df_box = df.copy()
df_box.reset_index(inplace=True)
df_box['year'] = [d.year for d in df_box.date]
df_box['month'] = [d.strftime('%b') for d in df_box.date]
# Draw box plots (using Seaborn)
fig, axs = plt.subplots(ncols=2, figsize=(24, 10))
sns.boxplot(data=df_box, ax=axs[0], x="year", y="value").set(
title="Year-wise Box Plot (Trend)", ylabel="Page Views", xlabel="Year")
sns.boxplot(
data=df_box,
ax=axs[1],
x="month",
y="value",
order=[
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec"]).set(
title="Month-wise Box Plot (Seasonality)",
ylabel="Page Views",
xlabel="Month")
# Save image and return fig (don't change this part)
fig.savefig('box_plot.png')

Loading…
Cancel
Save