main
moT01 4 years ago
commit 88b7d3d03a

@ -0,0 +1,24 @@
### Assignment
For this project you will visualize time series data using a line chart, bar chart, and box plots. You will use Pandas, Matplotlib, and Seaborn to visualize a dataset containing the number of page views each day on the freeCodeCamp.org forum from 2016-05-09 to 2019-12-03. The data visualizations will help you understand the patterns in visits and identify yearly and monthly growth.
Use the data to complete the following tasks:
* Use Pandas to import the data from "fcc-forum-pageviews.csv". Set the index to the "date" column.
* Clean the data by filtering out days when the page views were in the top 2.5% of the dataset or bottom 2.5% of the dataset.
* Create a `draw_line_plot` function that uses Matplotlib to draw a line chart similar to "examples/Figure_1.png". The title should be "Daily freeCodeCamp Forum Page Views 5/2016-12/2019". The label on the x axis should be "Date" and the label on the y axis should be "Page Views".
* Create a `draw_bar_plot` function that draws a bar chart similar to "examples/Figure_2.png". It should show average daily page views for each month grouped by year. The legend should show month labels and have a title of "Months". On the chart, the label on the x axis should be "Years" and the label on the y axis should be "Average Page Views".
* Create a `draw_box_plot` function that uses Searborn to draw two adjacent box plots similar to "examples/Figure_3.png". These box plots should show how the values are distributed within a given year or month and how it compares over time. The title of the first chart should be "Year-wise Box Plot (Trend)" and the title of the second chart should be "Month-wise Box Plot (Seasonality)". Make sure the month labels on bottom start at "Jan" and the x and x axis are labeled correctly.
For each chart, make sure to use a copy of the data frame. Unit tests are written for you under `test_module.py`.
### Development
For development, you can use `main.py` to test your functions. Click the "run" button and `main.py` will run.
### Testing
We imported the tests from `test_module.py` to `main.py` for your convenience. The tests will run automatically whenever you hit the "run" button.
### Submitting
Copy your project's URL and submit it to freeCodeCamp.

Binary file not shown.

After

Width:  |  Height:  |  Size: 270 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

File diff suppressed because it is too large Load Diff

@ -0,0 +1,11 @@
# This entrypoint file to be used in development. Start by reading README.md
import time_series_visualizer
from unittest import main
# Test your function by calling it here
time_series_visualizer.draw_line_plot()
time_series_visualizer.draw_bar_plot()
time_series_visualizer.draw_box_plot()
# Run unit tests automatically
main(module='test_module', exit=False)

131
poetry.lock generated

@ -0,0 +1,131 @@
[[package]]
category = "main"
description = "Composable style cycles"
name = "cycler"
optional = false
python-versions = "*"
version = "0.10.0"
[package.dependencies]
six = "*"
[[package]]
category = "main"
description = "A fast implementation of the Cassowary constraint solver"
name = "kiwisolver"
optional = false
python-versions = ">=3.6"
version = "1.2.0"
[[package]]
category = "main"
description = "Python plotting package"
name = "matplotlib"
optional = false
python-versions = ">=3.6"
version = "3.2.1"
[package.dependencies]
cycler = ">=0.10"
kiwisolver = ">=1.0.1"
numpy = ">=1.11"
pyparsing = ">=2.0.1,<2.0.4 || >2.0.4,<2.1.2 || >2.1.2,<2.1.6 || >2.1.6"
python-dateutil = ">=2.1"
[[package]]
category = "main"
description = "NumPy is the fundamental package for array computing with Python."
name = "numpy"
optional = false
python-versions = ">=3.5"
version = "1.18.5"
[[package]]
category = "main"
description = "Powerful data structures for data analysis, time series, and statistics"
name = "pandas"
optional = false
python-versions = ">=3.6.1"
version = "1.0.4"
[package.dependencies]
numpy = ">=1.13.3"
python-dateutil = ">=2.6.1"
pytz = ">=2017.2"
[[package]]
category = "main"
description = "Python parsing module"
name = "pyparsing"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
version = "2.4.7"
[[package]]
category = "main"
description = "Extensions to the standard Python datetime module"
name = "python-dateutil"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
version = "2.8.1"
[package.dependencies]
six = ">=1.5"
[[package]]
category = "main"
description = "World timezone definitions, modern and historical"
name = "pytz"
optional = false
python-versions = "*"
version = "2020.1"
[[package]]
category = "main"
description = "SciPy: Scientific Library for Python"
name = "scipy"
optional = false
python-versions = ">=3.5"
version = "1.4.1"
[package.dependencies]
numpy = ">=1.13.3"
[[package]]
category = "main"
description = "seaborn: statistical data visualization"
name = "seaborn"
optional = false
python-versions = ">=3.6"
version = "0.10.1"
[package.dependencies]
matplotlib = ">=2.1.2"
numpy = ">=1.13.3"
pandas = ">=0.22.0"
scipy = ">=1.0.1"
[[package]]
category = "main"
description = "Python 2 and 3 compatibility utilities"
name = "six"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
version = "1.15.0"
[metadata]
content-hash = "4e8082311e9378f77d7a1accb8cd080faf04d14d5f7beba06a8e2f950698f9f3"
python-versions = "^3.7"
[metadata.hashes]
cycler = ["1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d", "cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"]
kiwisolver = ["03662cbd3e6729f341a97dd2690b271e51a67a68322affab12a5b011344b973c", "18d749f3e56c0480dccd1714230da0f328e6e4accf188dd4e6884bdd06bf02dd", "247800260cd38160c362d211dcaf4ed0f7816afb5efe56544748b21d6ad6d17f", "443c2320520eda0a5b930b2725b26f6175ca4453c61f739fef7a5847bd262f74", "4eadb361baf3069f278b055e3bb53fa189cea2fd02cb2c353b7a99ebb4477ef1", "556da0a5f60f6486ec4969abbc1dd83cf9b5c2deadc8288508e55c0f5f87d29c", "603162139684ee56bcd57acc74035fceed7dd8d732f38c0959c8bd157f913fec", "60a78858580761fe611d22127868f3dc9f98871e6fdf0a15cc4203ed9ba6179b", "7cc095a4661bdd8a5742aaf7c10ea9fac142d76ff1770a0f84394038126d8fc7", "c31bc3c8e903d60a1ea31a754c72559398d91b5929fcb329b1c3a3d3f6e72113", "c955791d80e464da3b471ab41eb65cf5a40c15ce9b001fdc5bbc241170de58ec", "d069ef4b20b1e6b19f790d00097a5d5d2c50871b66d10075dab78938dc2ee2cf", "d52b989dc23cdaa92582ceb4af8d5bcc94d74b2c3e64cd6785558ec6a879793e", "e586b28354d7b6584d8973656a7954b1c69c93f708c0c07b77884f91640b7657", "efcf3397ae1e3c3a4a0a0636542bcad5adad3b1dd3e8e629d0b6e201347176c8", "fccefc0d36a38c57b7bd233a9b485e2f1eb71903ca7ad7adacad6c28a56d62d2"]
matplotlib = ["2466d4dddeb0f5666fd1e6736cc5287a4f9f7ae6c1a9e0779deff798b28e1d35", "282b3fc8023c4365bad924d1bb442ddc565c2d1635f210b700722776da466ca3", "4bb50ee4755271a2017b070984bcb788d483a8ce3132fab68393d1555b62d4ba", "56d3147714da5c7ac4bc452d041e70e0e0b07c763f604110bd4e2527f320b86d", "7a9baefad265907c6f0b037c8c35a10cf437f7708c27415a5513cf09ac6d6ddd", "aae7d107dc37b4bb72dcc45f70394e6df2e5e92ac4079761aacd0e2ad1d3b1f7", "af14e77829c5b5d5be11858d042d6f2459878f8e296228c7ea13ec1fd308eb68", "c1cf735970b7cd424502719b44288b21089863aaaab099f55e0283a721aaf781", "ce378047902b7a05546b6485b14df77b2ff207a0054e60c10b5680132090c8ee", "d35891a86a4388b6965c2d527b9a9f9e657d9e110b0575ca8a24ba0d4e34b8fc", "e06304686209331f99640642dee08781a9d55c6e32abb45ed54f021f46ccae47", "e20ba7fb37d4647ac38f3c6d8672dd8b62451ee16173a0711b37ba0ce42bf37d", "f4412241e32d0f8d3713b68d3ca6430190a5e8a7c070f1c07d7833d8c5264398", "ffe2f9cdcea1086fc414e82f42271ecf1976700b8edd16ca9d376189c6d93aee"]
numpy = ["0172304e7d8d40e9e49553901903dc5f5a49a703363ed756796f5808a06fc233", "34e96e9dae65c4839bd80012023aadd6ee2ccb73ce7fdf3074c62f301e63120b", "3676abe3d621fc467c4c1469ee11e395c82b2d6b5463a9454e37fe9da07cd0d7", "3dd6823d3e04b5f223e3e265b4a1eae15f104f4366edd409e5a5e413a98f911f", "4064f53d4cce69e9ac613256dc2162e56f20a4e2d2086b1956dd2fcf77b7fac5", "4674f7d27a6c1c52a4d1aa5f0881f1eff840d2206989bae6acb1c7668c02ebfb", "7d42ab8cedd175b5ebcb39b5208b25ba104842489ed59fbb29356f671ac93583", "965df25449305092b23d5145b9bdaeb0149b6e41a77a7d728b1644b3c99277c1", "9c9d6531bc1886454f44aa8f809268bc481295cf9740827254f53c30104f074a", "a78e438db8ec26d5d9d0e584b27ef25c7afa5a182d1bf4d05e313d2d6d515271", "a7acefddf994af1aeba05bbbafe4ba983a187079f125146dc5859e6d817df824", "a87f59508c2b7ceb8631c20630118cc546f1f815e034193dc72390db038a5cb3", "ac792b385d81151bae2a5a8adb2b88261ceb4976dbfaaad9ce3a200e036753dc", "b03b2c0badeb606d1232e5f78852c102c0a7989d3a534b3129e7856a52f3d161", "b39321f1a74d1f9183bf1638a745b4fd6fe80efbb1f6b32b932a588b4bc7695f", "cae14a01a159b1ed91a324722d746523ec757357260c6804d11d6147a9e53e3f", "cd49930af1d1e49a812d987c2620ee63965b619257bd76eaaa95870ca08837cf", "e15b382603c58f24265c9c931c9a45eebf44fe2e6b4eaedbb0d025ab3255228b", "e91d31b34fc7c2c8f756b4e902f901f856ae53a93399368d9a0dc7be17ed2ca0", "ef627986941b5edd1ed74ba89ca43196ed197f1a206a3f18cc9faf2fb84fd675", "f718a7949d1c4f622ff548c572e0c03440b49b9531ff00e4ed5738b459f011e8"]
pandas = ["034185bb615dc96d08fa13aacba8862949db19d5e7804d6ee242d086f07bcc46", "0c9b7f1933e3226cc16129cf2093338d63ace5c85db7c9588e3e1ac5c1937ad5", "1f6fcf0404626ca0475715da045a878c7062ed39bc859afc4ccf0ba0a586a0aa", "1fc963ba33c299973e92d45466e576d11f28611f3549469aec4a35658ef9f4cc", "29b4cfee5df2bc885607b8f016e901e63df7ffc8f00209000471778f46cc6678", "2a8b6c28607e3f3c344fe3e9b3cd76d2bf9f59bc8c0f2e582e3728b80e1786dc", "2bc2ff52091a6ac481cc75d514f06227dc1b10887df1eb72d535475e7b825e31", "415e4d52fcfd68c3d8f1851cef4d947399232741cc994c8f6aa5e6a9f2e4b1d8", "519678882fd0587410ece91e3ff7f73ad6ded60f6fcb8aa7bcc85c1dc20ecac6", "51e0abe6e9f5096d246232b461649b0aa627f46de8f6344597ca908f2240cbaa", "698e26372dba93f3aeb09cd7da2bb6dd6ade248338cfe423792c07116297f8f4", "83af85c8e539a7876d23b78433d90f6a0e8aa913e37320785cf3888c946ee874", "982cda36d1773076a415ec62766b3c0a21cdbae84525135bdb8f460c489bb5dd", "a647e44ba1b3344ebc5991c8aafeb7cca2b930010923657a273b41d86ae225c4", "b35d625282baa7b51e82e52622c300a1ca9f786711b2af7cbe64f1e6831f4126", "bab51855f8b318ef39c2af2c11095f45a10b74cbab4e3c8199efcc5af314c648"]
pyparsing = ["c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", "ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"]
python-dateutil = ["73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", "75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"]
pytz = ["a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", "c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048"]
scipy = ["00af72998a46c25bdb5824d2b729e7dabec0c765f9deb0b504f928591f5ff9d4", "0902a620a381f101e184a958459b36d3ee50f5effd186db76e131cbefcbb96f7", "1e3190466d669d658233e8a583b854f6386dd62d655539b77b3fa25bfb2abb70", "2cce3f9847a1a51019e8c5b47620da93950e58ebc611f13e0d11f4980ca5fecb", "3092857f36b690a321a662fe5496cb816a7f4eecd875e1d36793d92d3f884073", "386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa", "71eb180f22c49066f25d6df16f8709f215723317cc951d99e54dc88020ea57be", "770254a280d741dd3436919d47e35712fb081a6ff8bafc0f319382b954b77802", "787cc50cab3020a865640aba3485e9fbd161d4d3b0d03a967df1a2881320512d", "8a07760d5c7f3a92e440ad3aedcc98891e915ce857664282ae3c0220f3301eb6", "8d3bc3993b8e4be7eade6dcc6fd59a412d96d3a33fa42b0fa45dc9e24495ede9", "9508a7c628a165c2c835f2497837bf6ac80eb25291055f56c129df3c943cbaf8", "a144811318853a23d32a07bc7fd5561ff0cac5da643d96ed94a4ffe967d89672", "a1aae70d52d0b074d8121333bc807a485f9f1e6a69742010b33780df2e60cfe0", "a2d6df9eb074af7f08866598e4ef068a2b310d98f87dc23bd1b90ec7bdcec802", "bb517872058a1f087c4528e7429b4a44533a902644987e7b2fe35ecc223bc408", "c5cac0c0387272ee0e789e94a570ac51deb01c796b37fb2aad1fb13f85e2f97d", "cc971a82ea1170e677443108703a2ec9ff0f70752258d0e9f5433d00dda01f59", "dba8306f6da99e37ea08c08fef6e274b5bf8567bb094d1dbe86a20e532aca088", "dc60bb302f48acf6da8ca4444cfa17d52c63c5415302a9ee77b3b21618090521", "dee1bbf3a6c8f73b6b218cb28eed8dd13347ea2f87d572ce19b289d6fd3fbc59"]
seaborn = ["2d1a0c9d6bd1bc3cadb0364b8f06540f51322a670cf8438d0fde1c1c7317adc0", "c901ce494541fb4714cfa7db79d0232dc3f4c4dfd3f273bacf17816084df5b53"]
six = ["30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", "8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"]

@ -0,0 +1,9 @@
[tool]
[tool.poetry]
authors = ["Your Name <you@example.com>"]
name = "root"
version = "0.0.0"
[tool.poetry.dependencies]
pandas = "*"
python = "^3.7"
seaborn = "*"

@ -0,0 +1,123 @@
import unittest
import time_series_visualizer
import matplotlib as mpl
class DataCleaningTestCase(unittest.TestCase):
def test_data_cleaning(self):
actual = int(time_series_visualizer.df.count())
expected = 1238
self.assertEqual(actual, expected, "Expected DataFrame count after cleaning to be 1238.")
class LinePlotTestCase(unittest.TestCase):
def setUp(self):
self.fig = time_series_visualizer.draw_line_plot()
self.ax = self.fig.axes[0]
def test_line_plot_title(self):
actual = self.ax.get_title()
expected = "Daily freeCodeCamp Forum Page Views 5/2016-12/2019"
self.assertEqual(actual, expected, "Expected line plot title to be 'Daily freeCodeCamp Forum Page Views 5/2016-12/2019'")
def test_line_plot_labels(self):
actual = self.ax.get_xlabel()
expected = "Date"
self.assertEqual(actual, expected, "Expected line plot xlabel to be 'Date'")
actual = self.ax.get_ylabel()
expected = "Page Views"
self.assertEqual(actual, expected, "Expected line plot ylabel to be 'Page Views'")
def test_line_plot_data_quatity(self):
actual = len(self.ax.lines[0].get_ydata())
expected = 1238
self.assertEqual(actual, expected, "Expected number of data points in line plot to be 1238.")
class BarPlotTestCase(unittest.TestCase):
def setUp(self):
self.fig = time_series_visualizer.draw_bar_plot()
self.ax = self.fig.axes[0]
def test_bar_plot_legend_labels(self):
actual = []
for label in self.ax.get_legend().get_texts():
actual.append(label.get_text())
expected = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
self.assertEqual(actual, expected, "Expected bar plot legend labels to be months of the year.")
def test_bar_plot_labels(self):
actual = self.ax.get_xlabel()
expected = "Years"
self.assertEqual(actual, expected, "Expected bar plot xlabel to be 'Years'")
actual = self.ax.get_ylabel()
expected = "Average Page Views"
self.assertEqual(actual, expected, "Expected bar plot ylabel to be 'Average Page Views'")
actual = []
for label in self.ax.get_xaxis().get_majorticklabels():
actual.append(label.get_text())
expected = ['2016', '2017', '2018', '2019']
self.assertEqual(actual, expected, "Expected bar plot secondary labels to be '2016', '2017', '2018', '2019'")
def test_bar_plot_number_of_bars(self):
actual = len([rect for rect in self.ax.get_children() if isinstance(rect, mpl.patches.Rectangle)])
expected = 49
self.assertEqual(actual, expected, "Expected a different number of bars in bar chart.")
class BoxPlotTestCase(unittest.TestCase):
def setUp(self):
self.fig = time_series_visualizer.draw_box_plot()
self.ax1 = self.fig.axes[0]
self.ax2 = self.fig.axes[1]
def test_box_plot_number(self):
actual = len(self.fig.get_axes())
expected = 2
self.assertEqual(actual, expected, "Expected two box plots in figure.")
def test_box_plot_labels(self):
actual = self.ax1.get_xlabel()
expected = "Year"
self.assertEqual(actual, expected, "Expected box plot 1 xlabel to be 'Year'")
actual = self.ax1.get_ylabel()
expected = "Page Views"
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
actual = self.ax2.get_xlabel()
expected = "Month"
self.assertEqual(actual, expected, "Expected box plot 1 xlabel to be 'Month'")
actual = self.ax2.get_ylabel()
expected = "Page Views"
self.assertEqual(actual, expected, "Expected box plot 1 ylabel to be 'Page Views'")
actual = []
for label in self.ax1.get_xaxis().get_majorticklabels():
actual.append(label.get_text())
expected = ['2016', '2017', '2018', '2019']
self.assertEqual(actual, expected, "Expected box plot 1 secondary labels to be '2016', '2017', '2018', '2019'")
actual = []
for label in self.ax2.get_xaxis().get_majorticklabels():
actual.append(label.get_text())
expected = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
self.assertEqual(actual, expected, "Expected box plot 2 secondary labels to be 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'")
actual = []
for label in self.ax1.get_yaxis().get_majorticklabels():
actual.append(label.get_text())
expected = ['0', '20000', '40000', '60000', '80000', '100000', '120000', '140000', '160000', '180000', '200000']
self.assertEqual(actual, expected, "Expected box plot 1 secondary labels to be '0', '20000', '40000', '60000', '80000', '100000', '120000', '140000', '160000', '180000', '200000'")
def test_box_plot_titles(self):
actual = self.ax1.get_title()
expected = "Year-wise Box Plot (Trend)"
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Year-wise Box Plot (Trend)'")
actual = self.ax2.get_title()
expected = "Month-wise Box Plot (Seasonality)"
self.assertEqual(actual, expected, "Expected box plot 1 title to be 'Month-wise Box Plot (Seasonality)'")
def test_box_plot_number_of_boxs(self):
actual = len(self.ax1.lines) / 6 # Every box has 6 lines
expected = 4
self.assertEqual(actual, expected, "Expected four boxes in box plot 1")
actual = len(self.ax2.lines) / 6 # Every box has 6 lines
expected = 12
self.assertEqual(actual, expected, "Expected 12 boxes in box plot 2")
if __name__ == "__main__":
unittest.main()

@ -0,0 +1,54 @@
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
# Import data (Make sure to parse dates. Consider setting index column to 'date'.)
df = None
# Clean data
df = None
def draw_line_plot():
# Draw line plot
# Save image and return fig (don't change this part)
fig.savefig('line_plot.png')
return fig
def draw_bar_plot():
# Copy and modify data for monthly bar plot
df_bar = None
# Draw bar plot
# Save image and return fig (don't change this part)
fig.savefig('bar_plot.png')
return fig
def draw_box_plot():
# Prepare data for box plots (this part is done!)
df_box = df.copy()
df_box.reset_index(inplace=True)
df_box['year'] = [d.year for d in df_box.date]
df_box['month'] = [d.strftime('%b') for d in df_box.date]
# Draw box plots (using Seaborn)
# Save image and return fig (don't change this part)
fig.savefig('box_plot.png')
return fig
Loading…
Cancel
Save