init
This commit is contained in:
		
						commit
						ecb71658a1
					
				
							
								
								
									
										45
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,45 @@ | |||||||
|  | ### Assignment | ||||||
|  | 
 | ||||||
|  | # Demographic Data Analyzer | ||||||
|  | 
 | ||||||
|  | In this challenge you must analyze demographic data using Pandas. You are given a dataset of demographic data that was extracted from the 1994 Census database. Here is a sample of what the data looks like: | ||||||
|  | 
 | ||||||
|  | |    |   age | workclass        |   fnlwgt | education   |   education-num | marital-status     | occupation        | relationship   | race   | sex    |   capital-gain |   capital-loss |   hours-per-week | native-country   | salary   | | ||||||
|  | |---:|------:|:-----------------|---------:|:------------|----------------:|:-------------------|:------------------|:---------------|:-------|:-------|---------------:|---------------:|-----------------:|:-----------------|:---------| | ||||||
|  | |  0 |    39 | State-gov        |    77516 | Bachelors   |              13 | Never-married      | Adm-clerical      | Not-in-family  | White  | Male   |           2174 |              0 |               40 | United-States    | <=50K    | | ||||||
|  | |  1 |    50 | Self-emp-not-inc |    83311 | Bachelors   |              13 | Married-civ-spouse | Exec-managerial   | Husband        | White  | Male   |              0 |              0 |               13 | United-States    | <=50K    | | ||||||
|  | |  2 |    38 | Private          |   215646 | HS-grad     |               9 | Divorced           | Handlers-cleaners | Not-in-family  | White  | Male   |              0 |              0 |               40 | United-States    | <=50K    | | ||||||
|  | |  3 |    53 | Private          |   234721 | 11th        |               7 | Married-civ-spouse | Handlers-cleaners | Husband        | Black  | Male   |              0 |              0 |               40 | United-States    | <=50K    | | ||||||
|  | |  4 |    28 | Private          |   338409 | Bachelors   |              13 | Married-civ-spouse | Prof-specialty    | Wife           | Black  | Female |              0 |              0 |               40 | Cuba             | <=50K    | | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | You must use Pandas to answer the following questions: | ||||||
|  | * How many people of each race are represented in this dataset? This should be a Pandas series with race names as the index labels. (`race` column) | ||||||
|  | * What is the average age of men? | ||||||
|  | * What is the percentage of people who have a Bachelor's degree? | ||||||
|  | * What percentage of people with advanced education (`Bachelors`, `Masters`, or `Doctorate`) make more than 50K? | ||||||
|  | * What percentage of people without advanced education make more than 50K? | ||||||
|  | * What is the minimum number of hours a person works per week? | ||||||
|  | * What percentage of the people who work the minimum number of hours per week have a salary of more than 50K? | ||||||
|  | * What country has the highest percentage of people that earn >50K and what is that percentage? | ||||||
|  | * Identify the most popular occupation for those who earn >50K in India.  | ||||||
|  | 
 | ||||||
|  | Use the starter code in the file `demographic_data_anaylizer`. Update the code so all variables set to "None" are set to the appropriate calculation or code. Round all decimals to the nearest tenth. | ||||||
|  | 
 | ||||||
|  | Unit tests are written for you under `test_module.py`. | ||||||
|  | 
 | ||||||
|  | ### Development | ||||||
|  | 
 | ||||||
|  | For development, you can use `main.py` to test your functions. Click the "run" button and `main.py` will run. | ||||||
|  | 
 | ||||||
|  | ### Testing  | ||||||
|  | 
 | ||||||
|  | We imported the tests from `test_module.py` to `main.py` for your convenience. The tests will run automatically whenever you hit the "run" button. | ||||||
|  | 
 | ||||||
|  | ### Submitting | ||||||
|  | 
 | ||||||
|  | Copy your project's URL and submit it to freeCodeCamp. | ||||||
|  | 
 | ||||||
|  | ### Dataset Source | ||||||
|  | 
 | ||||||
|  | Dua, D. and Graff, C. (2019). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science. | ||||||
							
								
								
									
										32563
									
								
								adult.data.csv
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32563
									
								
								adult.data.csv
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										69
									
								
								demographic_data_analyzer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								demographic_data_analyzer.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,69 @@ | |||||||
|  | import pandas as pd | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def calculate_demographic_data(print_data=True): | ||||||
|  |     # Read data from file | ||||||
|  |     df = None | ||||||
|  | 
 | ||||||
|  |     # How many of each race are represented in this dataset? This should be a Pandas series with race names as the index labels. | ||||||
|  |     race_count = None | ||||||
|  | 
 | ||||||
|  |     # What is the average age of men? | ||||||
|  |     average_age_men = None | ||||||
|  | 
 | ||||||
|  |     # What is the percentage of people who have a Bachelor's degree? | ||||||
|  |     percentage_bachelors = None | ||||||
|  | 
 | ||||||
|  |     # What percentage of people with advanced education (`Bachelors`, `Masters`, or `Doctorate`) make more than 50K? | ||||||
|  |     # What percentage of people without advanced education make more than 50K? | ||||||
|  | 
 | ||||||
|  |     # with and without `Bachelors`, `Masters`, or `Doctorate` | ||||||
|  |     higher_education = None | ||||||
|  |     lower_education = None | ||||||
|  | 
 | ||||||
|  |     # percentage with salary >50K | ||||||
|  |     higher_education_rich = None | ||||||
|  |     lower_education_rich = None | ||||||
|  | 
 | ||||||
|  |     # What is the minimum number of hours a person works per week (hours-per-week feature)? | ||||||
|  |     min_work_hours = None | ||||||
|  | 
 | ||||||
|  |     # What percentage of the people who work the minimum number of hours per week have a salary of >50K? | ||||||
|  |     num_min_workers = None | ||||||
|  | 
 | ||||||
|  |     rich_percentage = None | ||||||
|  | 
 | ||||||
|  |     # What country has the highest percentage of people that earn >50K? | ||||||
|  |     highest_earning_country = None | ||||||
|  |     highest_earning_country_percentage = None | ||||||
|  | 
 | ||||||
|  |     # Identify the most popular occupation for those who earn >50K in India. | ||||||
|  |     top_IN_occupation = None | ||||||
|  | 
 | ||||||
|  |     # DO NOT MODIFY BELOW THIS LINE | ||||||
|  | 
 | ||||||
|  |     if print_data: | ||||||
|  |         print("Number of each race:\n", race_count)  | ||||||
|  |         print("Average age of men:", average_age_men) | ||||||
|  |         print(f"Percentage with Bachelors degrees: {percentage_bachelors}%") | ||||||
|  |         print(f"Percentage with higher education that earn >50K: {higher_education_rich}%") | ||||||
|  |         print(f"Percentage without higher education that earn >50K: {lower_education_rich}%") | ||||||
|  |         print(f"Min work time: {min_work_hours} hours/week") | ||||||
|  |         print(f"Percentage of rich among those who work fewest hours: {rich_percentage}%") | ||||||
|  |         print("Country with highest percentage of rich:", highest_earning_country) | ||||||
|  |         print(f"Highest percentage of rich people in country: {highest_earning_country_percentage}%") | ||||||
|  |         print("Top occupations in India:", top_IN_occupation) | ||||||
|  | 
 | ||||||
|  |     return { | ||||||
|  |         'race_count': race_count, | ||||||
|  |         'average_age_men': average_age_men, | ||||||
|  |         'percentage_bachelors': percentage_bachelors, | ||||||
|  |         'higher_education_rich': higher_education_rich, | ||||||
|  |         'lower_education_rich': lower_education_rich, | ||||||
|  |         'min_work_hours': min_work_hours, | ||||||
|  |         'rich_percentage': rich_percentage, | ||||||
|  |         'highest_earning_country': highest_earning_country, | ||||||
|  |         'highest_earning_country_percentage': | ||||||
|  |         highest_earning_country_percentage, | ||||||
|  |         'top_IN_occupation': top_IN_occupation | ||||||
|  |     } | ||||||
							
								
								
									
										9
									
								
								main.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								main.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,9 @@ | |||||||
|  | # This entrypoint file to be used in development. Start by reading README.md | ||||||
|  | import demographic_data_analyzer | ||||||
|  | from unittest import main | ||||||
|  | 
 | ||||||
|  | # Test your function by calling it here | ||||||
|  | demographic_data_analyzer.calculate_demographic_data() | ||||||
|  | 
 | ||||||
|  | # Run unit tests automatically | ||||||
|  | main(module='test_module', exit=False) | ||||||
							
								
								
									
										58
									
								
								poetry.lock
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								poetry.lock
									
									
									
										generated
									
									
									
										Normal file
									
								
							| @ -0,0 +1,58 @@ | |||||||
|  | [[package]] | ||||||
|  | category = "main" | ||||||
|  | description = "NumPy is the fundamental package for array computing with Python." | ||||||
|  | name = "numpy" | ||||||
|  | optional = false | ||||||
|  | python-versions = ">=3.5" | ||||||
|  | version = "1.18.5" | ||||||
|  | 
 | ||||||
|  | [[package]] | ||||||
|  | category = "main" | ||||||
|  | description = "Powerful data structures for data analysis, time series, and statistics" | ||||||
|  | name = "pandas" | ||||||
|  | optional = false | ||||||
|  | python-versions = ">=3.6.1" | ||||||
|  | version = "1.0.4" | ||||||
|  | 
 | ||||||
|  | [package.dependencies] | ||||||
|  | numpy = ">=1.13.3" | ||||||
|  | python-dateutil = ">=2.6.1" | ||||||
|  | pytz = ">=2017.2" | ||||||
|  | 
 | ||||||
|  | [[package]] | ||||||
|  | category = "main" | ||||||
|  | description = "Extensions to the standard Python datetime module" | ||||||
|  | name = "python-dateutil" | ||||||
|  | optional = false | ||||||
|  | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" | ||||||
|  | version = "2.8.1" | ||||||
|  | 
 | ||||||
|  | [package.dependencies] | ||||||
|  | six = ">=1.5" | ||||||
|  | 
 | ||||||
|  | [[package]] | ||||||
|  | category = "main" | ||||||
|  | description = "World timezone definitions, modern and historical" | ||||||
|  | name = "pytz" | ||||||
|  | optional = false | ||||||
|  | python-versions = "*" | ||||||
|  | version = "2020.1" | ||||||
|  | 
 | ||||||
|  | [[package]] | ||||||
|  | category = "main" | ||||||
|  | description = "Python 2 and 3 compatibility utilities" | ||||||
|  | name = "six" | ||||||
|  | optional = false | ||||||
|  | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" | ||||||
|  | version = "1.15.0" | ||||||
|  | 
 | ||||||
|  | [metadata] | ||||||
|  | content-hash = "27114271cf207dff3920111c8aa89baba75353cc23851aded0a93b193dc24770" | ||||||
|  | python-versions = "^3.8" | ||||||
|  | 
 | ||||||
|  | [metadata.hashes] | ||||||
|  | numpy = ["0172304e7d8d40e9e49553901903dc5f5a49a703363ed756796f5808a06fc233", "34e96e9dae65c4839bd80012023aadd6ee2ccb73ce7fdf3074c62f301e63120b", "3676abe3d621fc467c4c1469ee11e395c82b2d6b5463a9454e37fe9da07cd0d7", "3dd6823d3e04b5f223e3e265b4a1eae15f104f4366edd409e5a5e413a98f911f", "4064f53d4cce69e9ac613256dc2162e56f20a4e2d2086b1956dd2fcf77b7fac5", "4674f7d27a6c1c52a4d1aa5f0881f1eff840d2206989bae6acb1c7668c02ebfb", "7d42ab8cedd175b5ebcb39b5208b25ba104842489ed59fbb29356f671ac93583", "965df25449305092b23d5145b9bdaeb0149b6e41a77a7d728b1644b3c99277c1", "9c9d6531bc1886454f44aa8f809268bc481295cf9740827254f53c30104f074a", "a78e438db8ec26d5d9d0e584b27ef25c7afa5a182d1bf4d05e313d2d6d515271", "a7acefddf994af1aeba05bbbafe4ba983a187079f125146dc5859e6d817df824", "a87f59508c2b7ceb8631c20630118cc546f1f815e034193dc72390db038a5cb3", "ac792b385d81151bae2a5a8adb2b88261ceb4976dbfaaad9ce3a200e036753dc", "b03b2c0badeb606d1232e5f78852c102c0a7989d3a534b3129e7856a52f3d161", "b39321f1a74d1f9183bf1638a745b4fd6fe80efbb1f6b32b932a588b4bc7695f", "cae14a01a159b1ed91a324722d746523ec757357260c6804d11d6147a9e53e3f", "cd49930af1d1e49a812d987c2620ee63965b619257bd76eaaa95870ca08837cf", "e15b382603c58f24265c9c931c9a45eebf44fe2e6b4eaedbb0d025ab3255228b", "e91d31b34fc7c2c8f756b4e902f901f856ae53a93399368d9a0dc7be17ed2ca0", "ef627986941b5edd1ed74ba89ca43196ed197f1a206a3f18cc9faf2fb84fd675", "f718a7949d1c4f622ff548c572e0c03440b49b9531ff00e4ed5738b459f011e8"] | ||||||
|  | pandas = ["034185bb615dc96d08fa13aacba8862949db19d5e7804d6ee242d086f07bcc46", "0c9b7f1933e3226cc16129cf2093338d63ace5c85db7c9588e3e1ac5c1937ad5", "1f6fcf0404626ca0475715da045a878c7062ed39bc859afc4ccf0ba0a586a0aa", "1fc963ba33c299973e92d45466e576d11f28611f3549469aec4a35658ef9f4cc", "29b4cfee5df2bc885607b8f016e901e63df7ffc8f00209000471778f46cc6678", "2a8b6c28607e3f3c344fe3e9b3cd76d2bf9f59bc8c0f2e582e3728b80e1786dc", "2bc2ff52091a6ac481cc75d514f06227dc1b10887df1eb72d535475e7b825e31", "415e4d52fcfd68c3d8f1851cef4d947399232741cc994c8f6aa5e6a9f2e4b1d8", "519678882fd0587410ece91e3ff7f73ad6ded60f6fcb8aa7bcc85c1dc20ecac6", "51e0abe6e9f5096d246232b461649b0aa627f46de8f6344597ca908f2240cbaa", "698e26372dba93f3aeb09cd7da2bb6dd6ade248338cfe423792c07116297f8f4", "83af85c8e539a7876d23b78433d90f6a0e8aa913e37320785cf3888c946ee874", "982cda36d1773076a415ec62766b3c0a21cdbae84525135bdb8f460c489bb5dd", "a647e44ba1b3344ebc5991c8aafeb7cca2b930010923657a273b41d86ae225c4", "b35d625282baa7b51e82e52622c300a1ca9f786711b2af7cbe64f1e6831f4126", "bab51855f8b318ef39c2af2c11095f45a10b74cbab4e3c8199efcc5af314c648"] | ||||||
|  | python-dateutil = ["73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", "75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"] | ||||||
|  | pytz = ["a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed", "c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048"] | ||||||
|  | six = ["30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", "8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"] | ||||||
							
								
								
									
										15
									
								
								pyproject.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								pyproject.toml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,15 @@ | |||||||
|  | [tool.poetry] | ||||||
|  | name = "fcc-demographic-data-analyzer" | ||||||
|  | version = "0.1.0" | ||||||
|  | description = "" | ||||||
|  | authors = ["Your Name <you@example.com>"] | ||||||
|  | 
 | ||||||
|  | [tool.poetry.dependencies] | ||||||
|  | python = "^3.8" | ||||||
|  | pandas = "^1.0" | ||||||
|  | 
 | ||||||
|  | [tool.poetry.dev-dependencies] | ||||||
|  | 
 | ||||||
|  | [build-system] | ||||||
|  | requires = ["poetry>=0.12"] | ||||||
|  | build-backend = "poetry.masonry.api" | ||||||
							
								
								
									
										59
									
								
								test_module.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								test_module.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,59 @@ | |||||||
|  | import unittest | ||||||
|  | import demographic_data_analyzer | ||||||
|  | 
 | ||||||
|  | class DemographicAnalyzerTestCase(unittest.TestCase): | ||||||
|  |     def setUp(self): | ||||||
|  |         self.data = demographic_data_analyzer.calculate_demographic_data(print_data = False) | ||||||
|  | 
 | ||||||
|  |     def test_race_count(self): | ||||||
|  |         actual = self.data['race_count'].tolist() | ||||||
|  |         expected = [27816, 3124, 1039, 311, 271] | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected race count values to be [27816, 3124, 1039, 311, 271]") | ||||||
|  |      | ||||||
|  |     def test_average_age_men(self): | ||||||
|  |         actual = self.data['average_age_men'] | ||||||
|  |         expected = 39.4 | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected different value for average age of men.") | ||||||
|  | 
 | ||||||
|  |     def test_percentage_bachelors(self): | ||||||
|  |         actual = self.data['percentage_bachelors'] | ||||||
|  |         expected = 16.4  | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected different value for percentage with Bachelors degrees.") | ||||||
|  | 
 | ||||||
|  |     def test_higher_education_rich(self): | ||||||
|  |         actual = self.data['higher_education_rich'] | ||||||
|  |         expected = 46.5 | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected different value for percentage with higher education that earn >50K.") | ||||||
|  |    | ||||||
|  |     def test_lower_education_rich(self): | ||||||
|  |         actual = self.data['lower_education_rich'] | ||||||
|  |         expected = 17.4 | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected different value for percentage without higher education that earn >50K.") | ||||||
|  | 
 | ||||||
|  |     def test_min_work_hours(self): | ||||||
|  |         actual = self.data['min_work_hours'] | ||||||
|  |         expected = 1 | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected different value for minimum work hours.")      | ||||||
|  | 
 | ||||||
|  |     def test_rich_percentage(self): | ||||||
|  |         actual = self.data['rich_percentage'] | ||||||
|  |         expected = 10 | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected different value for percentage of rich among those who work fewest hours.")    | ||||||
|  | 
 | ||||||
|  |     def test_highest_earning_country(self): | ||||||
|  |         actual = self.data['highest_earning_country'] | ||||||
|  |         expected = 'Iran' | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected different value for highest earning country.")    | ||||||
|  | 
 | ||||||
|  |     def test_highest_earning_country_percentage(self): | ||||||
|  |         actual = self.data['highest_earning_country_percentage'] | ||||||
|  |         expected = 41.9 | ||||||
|  |         self.assertAlmostEqual(actual, expected, "Expected different value for heighest earning country percentage.")    | ||||||
|  | 
 | ||||||
|  |     def test_top_IN_occupation(self): | ||||||
|  |         actual = self.data['top_IN_occupation'] | ||||||
|  |         expected = 'Prof-specialty' | ||||||
|  |         self.assertEqual(actual, expected, "Expected different value for top occupations in India.")       | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     unittest.main() | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 moT01
						moT01