Compare commits

...

10 Commits

Author SHA1 Message Date
08d9bf66cf Solution 2023-06-10 09:30:54 +10:00
Prince Mendiratta
c097d7f434
fix: remove .replit and replit.nix files (#11)
Signed-off-by: Prince Mendiratta <prince.mendi@gmail.com>

Signed-off-by: Prince Mendiratta <prince.mendi@gmail.com>
2022-12-03 11:06:25 +09:00
Manabu Matsumoto
d872ea0320
fix: change the numbers in msg argument of test (#10) 2022-08-23 12:15:44 -07:00
Krzysztof G
37608af239
fix(config): add replit.nix config (#9) 2022-07-20 15:13:30 +09:00
Naomi Carrigan
da0977ed06
chore: clean up readme (#8) 2022-05-03 17:56:32 -05:00
PBM
aa5a4f29bd
fix: typos in test methods names (#7) 2021-07-15 07:16:11 +07:00
Kate
c9ad486335
fix: update box plot example image to include full data (#5) 2021-06-02 20:08:05 +05:30
Jaikishan Brijwani
5a2c48afe4
proposed fix to #39244 (#3)
* proposed fix to #39244

* Update test_module.py
2020-10-09 14:48:41 +09:00
Tom
3f27096a3e
feat: add description to pyproject.toml (#2) 2020-09-29 15:46:16 -07:00
Tom
670401d15b
feat: add replit config (#1) 2020-09-29 15:10:03 -07:00
6 changed files with 1315 additions and 44 deletions

View File

@ -1,24 +1,3 @@
### Assignment
# Page View Time Series Visualizer
For this project you will visualize time series data using a line chart, bar chart, and box plots. You will use Pandas, Matplotlib, and Seaborn to visualize a dataset containing the number of page views each day on the freeCodeCamp.org forum from 2016-05-09 to 2019-12-03. The data visualizations will help you understand the patterns in visits and identify yearly and monthly growth.
Use the data to complete the following tasks:
* Use Pandas to import the data from "fcc-forum-pageviews.csv". Set the index to the "date" column.
* Clean the data by filtering out days when the page views were in the top 2.5% of the dataset or bottom 2.5% of the dataset.
* Create a `draw_line_plot` function that uses Matplotlib to draw a line chart similar to "examples/Figure_1.png". The title should be "Daily freeCodeCamp Forum Page Views 5/2016-12/2019". The label on the x axis should be "Date" and the label on the y axis should be "Page Views".
* Create a `draw_bar_plot` function that draws a bar chart similar to "examples/Figure_2.png". It should show average daily page views for each month grouped by year. The legend should show month labels and have a title of "Months". On the chart, the label on the x axis should be "Years" and the label on the y axis should be "Average Page Views".
* Create a `draw_box_plot` function that uses Searborn to draw two adjacent box plots similar to "examples/Figure_3.png". These box plots should show how the values are distributed within a given year or month and how it compares over time. The title of the first chart should be "Year-wise Box Plot (Trend)" and the title of the second chart should be "Month-wise Box Plot (Seasonality)". Make sure the month labels on bottom start at "Jan" and the x and x axis are labeled correctly.
For each chart, make sure to use a copy of the data frame. Unit tests are written for you under `test_module.py`.
### Development
For development, you can use `main.py` to test your functions. Click the "run" button and `main.py` will run.
### Testing
We imported the tests from `test_module.py` to `main.py` for your convenience. The tests will run automatically whenever you hit the "run" button.
### Submitting
Copy your project's URL and submit it to freeCodeCamp.
This is the boilerplate for the Page View Time Series Visualizer project. Instructions for building your project can be found at https://www.freecodecamp.org/learn/data-analysis-with-python/data-analysis-with-python-projects/page-view-time-series-visualizer

Binary file not shown.

Before

Width:  |  Height:  |  Size: 108 KiB

After

Width:  |  Height:  |  Size: 84 KiB

View File

@ -3,6 +3,8 @@
authors = ["Your Name <you@example.com>"]
name = "root"
version = "0.0.0"
description = ""
[tool.poetry.dependencies]
pandas = "*"
python = "^3.7"

1231
solution.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -4,7 +4,7 @@ import matplotlib as mpl
class DataCleaningTestCase(unittest.TestCase):
def test_data_cleaning(self):
actual = int(time_series_visualizer.df.count())
actual = int(time_series_visualizer.df.count(numeric_only=True))
expected = 1238
self.assertEqual(actual, expected, "Expected DataFrame count after cleaning to be 1238.")
@ -26,7 +26,7 @@ class LinePlotTestCase(unittest.TestCase):
expected = "Page Views"
self.assertEqual(actual, expected, "Expected line plot ylabel to be 'Page Views'")
def test_line_plot_data_quatity(self):
def test_line_plot_data_quantity(self):
actual = len(self.ax.lines[0].get_ydata())
expected = 1238
self.assertEqual(actual, expected, "Expected number of data points in line plot to be 1238.")
@ -83,10 +83,10 @@ class BoxPlotTestCase(unittest.TestCase):
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
actual = self.ax2.get_xlabel()
expected = "Month"
self.assertEqual(actual, expected, "Expected box plot 1 xlabel to be 'Month'")
self.assertEqual(actual, expected, "Expected box plot 2 xlabel to be 'Month'")
actual = self.ax2.get_ylabel()
expected = "Page Views"
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
self.assertEqual(actual, expected, "Expected box plot 2 ylabel to be 'Page Views'")
actual = []
for label in self.ax1.get_xaxis().get_majorticklabels():
actual.append(label.get_text())
@ -109,9 +109,9 @@ class BoxPlotTestCase(unittest.TestCase):
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Year-wise Box Plot (Trend)'")
actual = self.ax2.get_title()
expected = "Month-wise Box Plot (Seasonality)"
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Month-wise Box Plot (Seasonality)'")
self.assertEqual(actual, expected, "Expected box plot 2 title to be 'Month-wise Box Plot (Seasonality)'")
def test_box_plot_number_of_boxs(self):
def test_box_plot_number_of_boxes(self):
actual = len(self.ax1.lines) / 6 # Every box has 6 lines
expected = 4
self.assertEqual(actual, expected, "Expected four boxes in box plot 1")

View File

@ -1,53 +1,112 @@
import calendar
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
df = None
df = pd.read_csv("fcc-forum-pageviews.csv")
# Clean data
df = None
df = df[(
df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
def draw_line_plot():
# Draw line plot
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
sns.lineplot(df["value"], estimator=None).set(
xlabel="Date",
ylabel="Page Views",
xticklabels=df.index,
title='Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
# Save image and return fig (don't change this part)
fig.savefig('line_plot.png')
return fig
def draw_bar_plot():
df = pd.read_csv(
"fcc-forum-pageviews.csv", parse_dates=True, index_col=[0])
df = df[
(df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
# Copy and modify data for monthly bar plot
df_bar = None
df['year'] = df.index.year
df['month'] = df.index.month
# Draw bar plot
df.groupby(['year', 'month']).mean()
df_bar = df.groupby([df.index.year, df.index.month]).mean()
df_bar['month'] = df_bar['month'].apply(
lambda x: calendar.month_name[int(x)])
df_bar = df_bar.astype({'year': 'int'})
fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
sns.barplot(
data=df_bar,
x='year',
y='value',
hue='month',
hue_order=[
'January',
'February',
'March',
'April',
'May',
'June',
'July',
'August',
'September',
'October',
'November',
'December']).set(
xlabel='Years', ylabel='Average Page Views')
# Save image and return fig (don't change this part)
fig.savefig('bar_plot.png')
return fig
def draw_box_plot():
# Prepare data for box plots (this part is done!)
df = pd.read_csv("fcc-forum-pageviews.csv", parse_dates=True, index_col=0)
df = df[(df['value'] >= df['value'].quantile(0.025))
& (df['value'] <= df['value'].quantile(0.975))]
df_box = df.copy()
df_box.reset_index(inplace=True)
df_box['year'] = [d.year for d in df_box.date]
df_box['month'] = [d.strftime('%b') for d in df_box.date]
# Draw box plots (using Seaborn)
fig, axs = plt.subplots(ncols=2, figsize=(24, 10))
sns.boxplot(data=df_box, ax=axs[0], x="year", y="value").set(
title="Year-wise Box Plot (Trend)", ylabel="Page Views", xlabel="Year")
sns.boxplot(
data=df_box,
ax=axs[1],
x="month",
y="value",
order=[
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec"]).set(
title="Month-wise Box Plot (Seasonality)",
ylabel="Page Views",
xlabel="Month")
# Save image and return fig (don't change this part)
fig.savefig('box_plot.png')