Compare commits
	
		
			10 Commits
		
	
	
		
			88b7d3d03a
			...
			08d9bf66cf
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 08d9bf66cf | |||
| 
						 | 
					c097d7f434 | ||
| 
						 | 
					d872ea0320 | ||
| 
						 | 
					37608af239 | ||
| 
						 | 
					da0977ed06 | ||
| 
						 | 
					aa5a4f29bd | ||
| 
						 | 
					c9ad486335 | ||
| 
						 | 
					5a2c48afe4 | ||
| 
						 | 
					3f27096a3e | ||
| 
						 | 
					670401d15b | 
							
								
								
									
										25
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								README.md
									
									
									
									
									
								
							@ -1,24 +1,3 @@
 | 
			
		||||
### Assignment
 | 
			
		||||
# Page View Time Series Visualizer
 | 
			
		||||
 | 
			
		||||
For this project you will visualize time series data using a line chart, bar chart, and box plots. You will use Pandas, Matplotlib, and Seaborn to visualize a dataset containing the number of page views each day on the freeCodeCamp.org forum from 2016-05-09 to 2019-12-03. The data visualizations will help you understand the patterns in visits and identify yearly and monthly growth.
 | 
			
		||||
 | 
			
		||||
Use the data to complete the following tasks:
 | 
			
		||||
* Use Pandas to import the data from "fcc-forum-pageviews.csv". Set the index to the "date" column.
 | 
			
		||||
* Clean the data by filtering out days when the page views were in the top 2.5% of the dataset or bottom 2.5% of the dataset.
 | 
			
		||||
* Create a `draw_line_plot` function that uses Matplotlib to draw a line chart similar to "examples/Figure_1.png". The title should be "Daily freeCodeCamp Forum Page Views 5/2016-12/2019". The label on the x axis should be "Date" and the label on the y axis should be "Page Views".
 | 
			
		||||
* Create a `draw_bar_plot` function that draws a bar chart similar to "examples/Figure_2.png". It should show average daily page views for each month grouped by year. The legend should show month labels and have a title of "Months". On the chart, the label on the x axis should be "Years" and the label on the y axis should be "Average Page Views".
 | 
			
		||||
* Create a `draw_box_plot` function that uses Searborn to draw two adjacent box plots similar to "examples/Figure_3.png". These box plots should show how the values are distributed within a given year or month and how it compares over time. The title of the first chart should be "Year-wise Box Plot (Trend)" and the title of the second chart should be "Month-wise Box Plot (Seasonality)". Make sure the month labels on bottom start at "Jan" and the x and x axis are labeled correctly.
 | 
			
		||||
 | 
			
		||||
For each chart, make sure to use a copy of the data frame. Unit tests are written for you under `test_module.py`.
 | 
			
		||||
 | 
			
		||||
### Development
 | 
			
		||||
 | 
			
		||||
For development, you can use `main.py` to test your functions. Click the "run" button and `main.py` will run.
 | 
			
		||||
 | 
			
		||||
### Testing 
 | 
			
		||||
 | 
			
		||||
We imported the tests from `test_module.py` to `main.py` for your convenience. The tests will run automatically whenever you hit the "run" button.
 | 
			
		||||
 | 
			
		||||
### Submitting
 | 
			
		||||
 | 
			
		||||
Copy your project's URL and submit it to freeCodeCamp.
 | 
			
		||||
This is the boilerplate for the Page View Time Series Visualizer project. Instructions for building your project can be found at https://www.freecodecamp.org/learn/data-analysis-with-python/data-analysis-with-python-projects/page-view-time-series-visualizer
 | 
			
		||||
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							| 
		 Before Width: | Height: | Size: 108 KiB After Width: | Height: | Size: 84 KiB  | 
@ -3,6 +3,8 @@
 | 
			
		||||
authors = ["Your Name <you@example.com>"]
 | 
			
		||||
name = "root"
 | 
			
		||||
version = "0.0.0"
 | 
			
		||||
description = ""
 | 
			
		||||
 | 
			
		||||
[tool.poetry.dependencies]
 | 
			
		||||
pandas = "*"
 | 
			
		||||
python = "^3.7"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1231
									
								
								solution.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1231
									
								
								solution.ipynb
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							@ -4,7 +4,7 @@ import matplotlib as mpl
 | 
			
		||||
 | 
			
		||||
class DataCleaningTestCase(unittest.TestCase):
 | 
			
		||||
    def test_data_cleaning(self):
 | 
			
		||||
        actual = int(time_series_visualizer.df.count())
 | 
			
		||||
        actual = int(time_series_visualizer.df.count(numeric_only=True))
 | 
			
		||||
        expected = 1238
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected DataFrame count after cleaning to be 1238.")
 | 
			
		||||
 | 
			
		||||
@ -26,7 +26,7 @@ class LinePlotTestCase(unittest.TestCase):
 | 
			
		||||
        expected = "Page Views"
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected line plot ylabel to be 'Page Views'")
 | 
			
		||||
 | 
			
		||||
    def test_line_plot_data_quatity(self):
 | 
			
		||||
    def test_line_plot_data_quantity(self):
 | 
			
		||||
        actual = len(self.ax.lines[0].get_ydata())
 | 
			
		||||
        expected = 1238
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected number of data points in line plot to be 1238.")
 | 
			
		||||
@ -83,10 +83,10 @@ class BoxPlotTestCase(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
 | 
			
		||||
        actual = self.ax2.get_xlabel()
 | 
			
		||||
        expected = "Month"
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected box plot 1 xlabel to be 'Month'")
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected box plot 2 xlabel to be 'Month'")
 | 
			
		||||
        actual = self.ax2.get_ylabel()
 | 
			
		||||
        expected = "Page Views"
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected box plot 2 ylabel to be 'Page Views'")
 | 
			
		||||
        actual = []
 | 
			
		||||
        for label in self.ax1.get_xaxis().get_majorticklabels():
 | 
			
		||||
            actual.append(label.get_text())
 | 
			
		||||
@ -109,9 +109,9 @@ class BoxPlotTestCase(unittest.TestCase):
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Year-wise Box Plot (Trend)'")
 | 
			
		||||
        actual = self.ax2.get_title()
 | 
			
		||||
        expected = "Month-wise Box Plot (Seasonality)"
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Month-wise Box Plot (Seasonality)'")
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected box plot 2 title to be 'Month-wise Box Plot (Seasonality)'")
 | 
			
		||||
 | 
			
		||||
    def test_box_plot_number_of_boxs(self):
 | 
			
		||||
    def test_box_plot_number_of_boxes(self):
 | 
			
		||||
        actual = len(self.ax1.lines) / 6 # Every box has 6 lines
 | 
			
		||||
        expected = 4
 | 
			
		||||
        self.assertEqual(actual, expected, "Expected four boxes in box plot 1")
 | 
			
		||||
 | 
			
		||||
@ -1,53 +1,112 @@
 | 
			
		||||
import calendar
 | 
			
		||||
 | 
			
		||||
import matplotlib.pyplot as plt
 | 
			
		||||
import pandas as pd
 | 
			
		||||
import seaborn as sns
 | 
			
		||||
from pandas.plotting import register_matplotlib_converters
 | 
			
		||||
 | 
			
		||||
register_matplotlib_converters()
 | 
			
		||||
 | 
			
		||||
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
 | 
			
		||||
df = None
 | 
			
		||||
df = pd.read_csv("fcc-forum-pageviews.csv")
 | 
			
		||||
 | 
			
		||||
# Clean data
 | 
			
		||||
df = None
 | 
			
		||||
df = df[(
 | 
			
		||||
    df['value'] >= df['value'].quantile(0.025))
 | 
			
		||||
    & (df['value'] <= df['value'].quantile(0.975))]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def draw_line_plot():
 | 
			
		||||
    # Draw line plot
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
 | 
			
		||||
    sns.lineplot(df["value"], estimator=None).set(
 | 
			
		||||
        xlabel="Date",
 | 
			
		||||
        ylabel="Page Views",
 | 
			
		||||
        xticklabels=df.index,
 | 
			
		||||
        title='Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
 | 
			
		||||
 | 
			
		||||
    # Save image and return fig (don't change this part)
 | 
			
		||||
    fig.savefig('line_plot.png')
 | 
			
		||||
    return fig
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def draw_bar_plot():
 | 
			
		||||
    df = pd.read_csv(
 | 
			
		||||
        "fcc-forum-pageviews.csv", parse_dates=True, index_col=[0])
 | 
			
		||||
    df = df[
 | 
			
		||||
        (df['value'] >= df['value'].quantile(0.025))
 | 
			
		||||
        & (df['value'] <= df['value'].quantile(0.975))]
 | 
			
		||||
    # Copy and modify data for monthly bar plot
 | 
			
		||||
    df_bar = None
 | 
			
		||||
    df['year'] = df.index.year
 | 
			
		||||
    df['month'] = df.index.month
 | 
			
		||||
 | 
			
		||||
    # Draw bar plot
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    df.groupby(['year', 'month']).mean()
 | 
			
		||||
    df_bar = df.groupby([df.index.year, df.index.month]).mean()
 | 
			
		||||
    df_bar['month'] = df_bar['month'].apply(
 | 
			
		||||
        lambda x: calendar.month_name[int(x)])
 | 
			
		||||
    df_bar = df_bar.astype({'year': 'int'})
 | 
			
		||||
    fig, axs = plt.subplots(ncols=1, figsize=(20, 10))
 | 
			
		||||
    sns.barplot(
 | 
			
		||||
        data=df_bar,
 | 
			
		||||
        x='year',
 | 
			
		||||
        y='value',
 | 
			
		||||
        hue='month',
 | 
			
		||||
        hue_order=[
 | 
			
		||||
                 'January',
 | 
			
		||||
                 'February',
 | 
			
		||||
                 'March',
 | 
			
		||||
                 'April',
 | 
			
		||||
                 'May',
 | 
			
		||||
                 'June',
 | 
			
		||||
                 'July',
 | 
			
		||||
                 'August',
 | 
			
		||||
                 'September',
 | 
			
		||||
                 'October',
 | 
			
		||||
                 'November',
 | 
			
		||||
                 'December']).set(
 | 
			
		||||
        xlabel='Years', ylabel='Average Page Views')
 | 
			
		||||
 | 
			
		||||
    # Save image and return fig (don't change this part)
 | 
			
		||||
    fig.savefig('bar_plot.png')
 | 
			
		||||
    return fig
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def draw_box_plot():
 | 
			
		||||
    # Prepare data for box plots (this part is done!)
 | 
			
		||||
    df = pd.read_csv("fcc-forum-pageviews.csv", parse_dates=True, index_col=0)
 | 
			
		||||
    df = df[(df['value'] >= df['value'].quantile(0.025))
 | 
			
		||||
            & (df['value'] <= df['value'].quantile(0.975))]
 | 
			
		||||
    df_box = df.copy()
 | 
			
		||||
    df_box.reset_index(inplace=True)
 | 
			
		||||
    df_box['year'] = [d.year for d in df_box.date]
 | 
			
		||||
    df_box['month'] = [d.strftime('%b') for d in df_box.date]
 | 
			
		||||
 | 
			
		||||
    # Draw box plots (using Seaborn)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    fig, axs = plt.subplots(ncols=2, figsize=(24, 10))
 | 
			
		||||
    sns.boxplot(data=df_box, ax=axs[0], x="year", y="value").set(
 | 
			
		||||
        title="Year-wise Box Plot (Trend)", ylabel="Page Views", xlabel="Year")
 | 
			
		||||
    sns.boxplot(
 | 
			
		||||
        data=df_box,
 | 
			
		||||
        ax=axs[1],
 | 
			
		||||
        x="month",
 | 
			
		||||
        y="value",
 | 
			
		||||
        order=[
 | 
			
		||||
            "Jan",
 | 
			
		||||
            "Feb",
 | 
			
		||||
            "Mar",
 | 
			
		||||
            "Apr",
 | 
			
		||||
            "May",
 | 
			
		||||
            "Jun",
 | 
			
		||||
            "Jul",
 | 
			
		||||
            "Aug",
 | 
			
		||||
            "Sep",
 | 
			
		||||
            "Oct",
 | 
			
		||||
            "Nov",
 | 
			
		||||
            "Dec"]).set(
 | 
			
		||||
        title="Month-wise Box Plot (Seasonality)",
 | 
			
		||||
        ylabel="Page Views",
 | 
			
		||||
        xlabel="Month")
 | 
			
		||||
 | 
			
		||||
    # Save image and return fig (don't change this part)
 | 
			
		||||
    fig.savefig('box_plot.png')
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user