main
Manish 10 months ago
parent 576b86f2b9
commit 640f87cee8

File diff suppressed because one or more lines are too long

@ -0,0 +1,931 @@
```python
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import FileUpload
import pandas as pd
import io
import json
```
## Upload File
```python
uploaded = FileUpload(accept=".csv")
display(uploaded)
```
FileUpload(value=(), accept='.csv', description='Upload')
```python
for f in uploaded.value:
content = f["content"]
df = pd.read_csv(io.BytesIO(content))
break
df
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Year</th>
<th>CSIRO Adjusted Sea Level</th>
<th>Lower Error Bound</th>
<th>Upper Error Bound</th>
<th>NOAA Adjusted Sea Level</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>1880</td>
<td>0.000000</td>
<td>-0.952756</td>
<td>0.952756</td>
<td>NaN</td>
</tr>
<tr>
<th>1</th>
<td>1881</td>
<td>0.220472</td>
<td>-0.732283</td>
<td>1.173228</td>
<td>NaN</td>
</tr>
<tr>
<th>2</th>
<td>1882</td>
<td>-0.440945</td>
<td>-1.346457</td>
<td>0.464567</td>
<td>NaN</td>
</tr>
<tr>
<th>3</th>
<td>1883</td>
<td>-0.232283</td>
<td>-1.129921</td>
<td>0.665354</td>
<td>NaN</td>
</tr>
<tr>
<th>4</th>
<td>1884</td>
<td>0.590551</td>
<td>-0.283465</td>
<td>1.464567</td>
<td>NaN</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>129</th>
<td>2009</td>
<td>8.586614</td>
<td>8.311024</td>
<td>8.862205</td>
<td>8.046354</td>
</tr>
<tr>
<th>130</th>
<td>2010</td>
<td>8.901575</td>
<td>8.618110</td>
<td>9.185039</td>
<td>8.122973</td>
</tr>
<tr>
<th>131</th>
<td>2011</td>
<td>8.964567</td>
<td>8.661417</td>
<td>9.267717</td>
<td>8.053065</td>
</tr>
<tr>
<th>132</th>
<td>2012</td>
<td>9.326772</td>
<td>8.992126</td>
<td>9.661417</td>
<td>8.457058</td>
</tr>
<tr>
<th>133</th>
<td>2013</td>
<td>8.980315</td>
<td>8.622047</td>
<td>9.338583</td>
<td>8.546648</td>
</tr>
</tbody>
</table>
<p>134 rows × 5 columns</p>
</div>
## Provide CSV file URL as interactive input
```python
url = input("Provide direct url of CSV file: ")
df = pd.read_csv(url)
df
# https://github.com/jasperdebie/VisInfo/raw/master/us-state-capitals.csv
```
Provide direct url of CSV file: https://github.com/jasperdebie/VisInfo/raw/master/us-state-capitals.csv
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>name</th>
<th>description</th>
<th>latitude</th>
<th>longitude</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>Alabama</td>
<td>Montgomery</td>
<td>32.377716</td>
<td>-86.300568</td>
</tr>
<tr>
<th>1</th>
<td>Alaska</td>
<td>Juneau</td>
<td>58.301598</td>
<td>-134.420212</td>
</tr>
<tr>
<th>2</th>
<td>Arizona</td>
<td>Phoenix</td>
<td>33.448143</td>
<td>-112.096962</td>
</tr>
<tr>
<th>3</th>
<td>Arkansas</td>
<td>Little Rock</td>
<td>34.746613</td>
<td>-92.288986</td>
</tr>
<tr>
<th>4</th>
<td>California</td>
<td>Sacramento</td>
<td>38.576668</td>
<td>-121.493629</td>
</tr>
<tr>
<th>5</th>
<td>Colorado</td>
<td>Denver</td>
<td>39.739227</td>
<td>-104.984856</td>
</tr>
<tr>
<th>6</th>
<td>Connecticut</td>
<td>Hartford&lt;br&gt;</td>
<td>41.764046</td>
<td>-72.682198</td>
</tr>
<tr>
<th>7</th>
<td>Delaware</td>
<td>Dover</td>
<td>39.157307</td>
<td>-75.519722</td>
</tr>
<tr>
<th>8</th>
<td>Hawaii</td>
<td>Honolulu</td>
<td>21.307442</td>
<td>-157.857376</td>
</tr>
<tr>
<th>9</th>
<td>Florida</td>
<td>Tallahassee</td>
<td>30.438118</td>
<td>-84.281296</td>
</tr>
<tr>
<th>10</th>
<td>Georgia</td>
<td>Atlanta&lt;br&gt;</td>
<td>33.749027</td>
<td>-84.388229</td>
</tr>
<tr>
<th>11</th>
<td>Idaho</td>
<td>Boise</td>
<td>43.617775</td>
<td>-116.199722</td>
</tr>
<tr>
<th>12</th>
<td>Illinois</td>
<td>Springfield</td>
<td>39.798363</td>
<td>-89.654961</td>
</tr>
<tr>
<th>13</th>
<td>Indiana</td>
<td>Indianapolis</td>
<td>39.768623</td>
<td>-86.162643</td>
</tr>
<tr>
<th>14</th>
<td>Iowa</td>
<td>Des Moines</td>
<td>41.591087</td>
<td>-93.603729</td>
</tr>
<tr>
<th>15</th>
<td>Kansas</td>
<td>Topeka</td>
<td>39.048191</td>
<td>-95.677956</td>
</tr>
<tr>
<th>16</th>
<td>Kentucky</td>
<td>Frankfort</td>
<td>38.186722</td>
<td>-84.875374</td>
</tr>
<tr>
<th>17</th>
<td>Louisiana</td>
<td>Baton Rouge</td>
<td>30.457069</td>
<td>-91.187393</td>
</tr>
<tr>
<th>18</th>
<td>Maine</td>
<td>Augusta</td>
<td>44.307167</td>
<td>-69.781693</td>
</tr>
<tr>
<th>19</th>
<td>Maryland</td>
<td>Annapolis</td>
<td>38.978764</td>
<td>-76.490936</td>
</tr>
<tr>
<th>20</th>
<td>Massachusetts</td>
<td>Boston</td>
<td>42.358162</td>
<td>-71.063698</td>
</tr>
<tr>
<th>21</th>
<td>Michigan</td>
<td>Lansing</td>
<td>42.733635</td>
<td>-84.555328</td>
</tr>
<tr>
<th>22</th>
<td>Minnesota</td>
<td>St. Paul</td>
<td>44.955097</td>
<td>-93.102211</td>
</tr>
<tr>
<th>23</th>
<td>Mississippi</td>
<td>Jackson</td>
<td>32.303848</td>
<td>-90.182106</td>
</tr>
<tr>
<th>24</th>
<td>Missouri</td>
<td>Jefferson City</td>
<td>38.579201</td>
<td>-92.172935</td>
</tr>
<tr>
<th>25</th>
<td>Montana</td>
<td>Helena</td>
<td>46.585709</td>
<td>-112.018417</td>
</tr>
<tr>
<th>26</th>
<td>Nebraska</td>
<td>Lincoln</td>
<td>40.808075</td>
<td>-96.699654</td>
</tr>
<tr>
<th>27</th>
<td>Nevada</td>
<td>Carson City</td>
<td>39.163914</td>
<td>-119.766121</td>
</tr>
<tr>
<th>28</th>
<td>New Hampshire</td>
<td>Concord</td>
<td>43.206898</td>
<td>-71.537994</td>
</tr>
<tr>
<th>29</th>
<td>New Jersey</td>
<td>Trenton</td>
<td>40.220596</td>
<td>-74.769913</td>
</tr>
<tr>
<th>30</th>
<td>New Mexico</td>
<td>Santa Fe</td>
<td>35.682240</td>
<td>-105.939728</td>
</tr>
<tr>
<th>31</th>
<td>North Carolina</td>
<td>Raleigh</td>
<td>35.780430</td>
<td>-78.639099</td>
</tr>
<tr>
<th>32</th>
<td>North Dakota</td>
<td>Bismarck</td>
<td>46.820850</td>
<td>-100.783318</td>
</tr>
<tr>
<th>33</th>
<td>New York</td>
<td>Albany</td>
<td>42.652843</td>
<td>-73.757874</td>
</tr>
<tr>
<th>34</th>
<td>Ohio</td>
<td>Columbus</td>
<td>39.961346</td>
<td>-82.999069</td>
</tr>
<tr>
<th>35</th>
<td>Oklahoma</td>
<td>Oklahoma City</td>
<td>35.492207</td>
<td>-97.503342</td>
</tr>
<tr>
<th>36</th>
<td>Oregon</td>
<td>Salem</td>
<td>44.938461</td>
<td>-123.030403</td>
</tr>
<tr>
<th>37</th>
<td>Pennsylvania</td>
<td>Harrisburg</td>
<td>40.264378</td>
<td>-76.883598</td>
</tr>
<tr>
<th>38</th>
<td>Rhode Island</td>
<td>Providence</td>
<td>41.830914</td>
<td>-71.414963</td>
</tr>
<tr>
<th>39</th>
<td>South Carolina</td>
<td>Columbia</td>
<td>34.000343</td>
<td>-81.033211</td>
</tr>
<tr>
<th>40</th>
<td>South Dakota</td>
<td>Pierre</td>
<td>44.367031</td>
<td>-100.346405</td>
</tr>
<tr>
<th>41</th>
<td>Tennessee</td>
<td>Nashville</td>
<td>36.165810</td>
<td>-86.784241</td>
</tr>
<tr>
<th>42</th>
<td>Texas</td>
<td>Austin</td>
<td>30.274670</td>
<td>-97.740349</td>
</tr>
<tr>
<th>43</th>
<td>Utah</td>
<td>Salt Lake City</td>
<td>40.777477</td>
<td>-111.888237</td>
</tr>
<tr>
<th>44</th>
<td>Vermont</td>
<td>Montpelier</td>
<td>44.262436</td>
<td>-72.580536</td>
</tr>
<tr>
<th>45</th>
<td>Virginia</td>
<td>Richmond</td>
<td>37.538857</td>
<td>-77.433640</td>
</tr>
<tr>
<th>46</th>
<td>Washington</td>
<td>Olympia</td>
<td>47.035805</td>
<td>-122.905014</td>
</tr>
<tr>
<th>47</th>
<td>West Virginia</td>
<td>Charleston</td>
<td>38.336246</td>
<td>-81.612328</td>
</tr>
<tr>
<th>48</th>
<td>Wisconsin</td>
<td>Madison</td>
<td>43.074684</td>
<td>-89.384445</td>
</tr>
<tr>
<th>49</th>
<td>Wyoming</td>
<td>Cheyenne</td>
<td>41.140259</td>
<td>-104.820236</td>
</tr>
</tbody>
</table>
</div>
```python
# source: https://public.opendatasoft.com/explore/dataset/us-state-boundaries/table/
with open("us-state-boundaries.csv") as f:
sb = [line.split(';')[-2] for line in f.readlines()[1:]] # state boundaries
sb = ['[['+b[b.find("[[[")+3: b.find("]]]")+1].strip('[').strip(']')+']]' for b in sb]
sb2 = []
for i in range(len(sb)):
try:
sb2.append(json.loads(sb[i]))
except BaseException as e:
print(i)
sb3 = []
for b in sb2:
x = []
y = []
for xy in b:
x.append(xy[0])
y.append(xy[1])
sb3.append([x, y])
print(sb3[0])
```
3
[[-64.843729999672, -64.8306170004155, -64.8166740000574, -64.8003289998509, -64.7850010003439, -64.7780049999629, -64.756923000007, -64.7438040000044, -64.7376500004207, -64.7298299996841, -64.727025000328, -64.7081970000824, -64.7016040001075, -64.6832930000708, -64.6752489999265, -64.6684810000495, -64.6404370000478, -64.6391440000621, -64.643689000191, -64.6377349997255, -64.6638549997298, -64.6613010004438, -64.6642589999609, -64.6782290000832, -64.6944889999008, -64.7079069996541, -64.720173999712, -64.7487359997476, -64.7577690001039, -64.7734199996511, -64.7828909996753, -64.8012889997506, -64.7992609999961, -64.8027730004303, -64.8068909998636, -64.8185169998639, -64.8333530001747, -64.849551999893, -64.8580350004284, -64.8725339995569, -64.8897519996245, -64.8997750000177, -64.9052399998634, -64.9061510002576, -64.921034000424, -64.9337140000354, -64.9420610001283, -64.9464720003629, -64.9609639998188, -64.9804139998553, -64.9978610002517, -65.0198210000917, -65.0315650000928, -65.0485809995957, -65.0501940001468, -65.0577479998463, -65.0677319998815, -65.0809529999913, -65.10083099965, -65.1172670000931, -65.1312199995987, -65.1426220001913, -65.1477229999889, -65.154117999597, -65.1536439999441, -65.1506989999476, -65.1448089999545, -65.137688999725, -65.1399409997855, -65.1351379999137, -65.1303469997376, -65.1174269999267, -65.1130569997, -65.1137599996818, -65.112214999801, -65.1004130000736, -65.091790999621, -65.0805670002019, -65.0606650002537, -65.0415220003638, -65.0172989996944, -65.007769000119, -64.9995900002555, -64.9823889999064, -64.9691820000403, -64.950455000077, -64.9426920001401, -64.9276419995691, -64.907594999856, -64.8950240000245, -64.8910329999146, -64.8739119999314, -64.8649759996594, -64.8599500002049, -64.843729999672], [18.3937130003038, 18.3952020001978, 18.4029059996947, 18.4075949997621, 18.4038019999373, 18.386209000173, 18.3766859999062, 18.3781310002826, 18.3762170003899, 18.3779449996133, 18.3747250001871, 18.373742000052, 18.3706359999168, 18.3725669997141, 18.3674629998913, 18.3655769999245, 18.363997000316, 18.3551899997673, 18.3441069997742, 18.3197589996555, 18.2853760001861, 18.2673589997501, 18.2612709996959, 18.2523160002306, 18.2487579998992, 18.2487249997681, 18.2506900002989, 18.2630590000803, 18.2608550002774, 18.2605059997401, 18.2526930002856, 18.2451059996837, 18.2293219998289, 18.2133720003843, 18.2051629999707, 18.1920609998845, 18.1834559998114, 18.1796829999945, 18.1795870003758, 18.1834839999737, 18.1942359996752, 18.2065249995874, 18.2185139997731, 18.2241929999785, 18.2288429997134, 18.2379699999478, 18.2480099999376, 18.2572230000658, 18.2560599998827, 18.2569120001884, 18.252492999917, 18.2523549996564, 18.2562770001029, 18.2667770001428, 18.2691289998076, 18.2541629999811, 18.2439080002976, 18.2364819999318, 18.2326290001953, 18.23477199986, 18.2417569995878, 18.2507890004071, 18.2590850003178, 18.2774250002881, 18.2931389997995, 18.302844999769, 18.3128859999509, 18.3205580003919, 18.3362909997264, 18.3508549999627, 18.3615369997632, 18.3802330002905, 18.3957579996876, 18.4068750001337, 18.418827, 18.4395440001859, 18.4471019997825, 18.4528070003069, 18.4597469996518, 18.4591020001661, 18.4493229999302, 18.4407029996101, 18.4455259999609, 18.4507079999187, 18.4509469999009, 18.4462940001965, 18.4541570002839, 18.4619490000909, 18.4649839999656, 18.4633640003554, 18.4621600003593, 18.4062549996746, 18.3959270002777, 18.3942250002094, 18.3937130003038]]
```python
fig, axis = plt.subplots()
x_col = "longitude"
y_col = "latitude"
xlo = df[x_col].min() - 5
xhi = df[x_col].max() + 5
ylo = df[y_col].min() - 5
yhi = df[y_col].max() + 5
x = df[x_col].to_numpy()
y = df[y_col].to_numpy()
plt.axis([xlo, xhi, ylo, yhi])
plt.plot(x, y, "ro")
for b in sb3:
plt.plot(b[0], b[1], "b")
plt.title("USA states and their capital location")
plt.show()
```
![png](output_7_0.png)
## Edit CSV file URL variable
```python
url = "https://data.gov.au/data/dataset/7169894e-b518-4933-a900-f79ebc4ec6a3/resource/5d9edabe-b6af-4975-9340-88f55b872a00/download/soe2016antarctic5-year-smoothed-annual-max-min-temperature-by-stationaad.csv"
df = pd.read_csv(url)
df
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Year</th>
<th>Macquarie Island MAX</th>
<th>Macquarie Island MIN</th>
<th>Mawson MAX</th>
<th>Mawson MIN</th>
<th>Casey MAX</th>
<th>Casey MIN</th>
<th>Davis MAX</th>
<th>Davis MIN</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>1948</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
<tr>
<th>1</th>
<td>1949</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
<tr>
<th>2</th>
<td>1950</td>
<td>6.050</td>
<td>2.625</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
<tr>
<th>3</th>
<td>1951</td>
<td>6.100</td>
<td>2.660</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
<tr>
<th>4</th>
<td>1952</td>
<td>6.140</td>
<td>2.660</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>63</th>
<td>2011</td>
<td>6.740</td>
<td>3.200</td>
<td>-8.04</td>
<td>-14.28</td>
<td>-5.86</td>
<td>-12.50</td>
<td>-7.00</td>
<td>-12.98</td>
</tr>
<tr>
<th>64</th>
<td>2012</td>
<td>6.840</td>
<td>3.260</td>
<td>-8.24</td>
<td>-14.40</td>
<td>-6.28</td>
<td>-12.96</td>
<td>-7.10</td>
<td>-13.08</td>
</tr>
<tr>
<th>65</th>
<td>2013</td>
<td>6.825</td>
<td>3.225</td>
<td>-8.10</td>
<td>-14.30</td>
<td>-6.30</td>
<td>-12.90</td>
<td>-7.05</td>
<td>-13.05</td>
</tr>
<tr>
<th>66</th>
<td>2014</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
<tr>
<th>67</th>
<td>2015</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
</tbody>
</table>
<p>68 rows × 9 columns</p>
</div>
```python
df.head(2)
```
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>Year</th>
<th>Macquarie Island MAX</th>
<th>Macquarie Island MIN</th>
<th>Mawson MAX</th>
<th>Mawson MIN</th>
<th>Casey MAX</th>
<th>Casey MIN</th>
<th>Davis MAX</th>
<th>Davis MIN</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>1948</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
<tr>
<th>1</th>
<td>1949</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
</tr>
</tbody>
</table>
</div>
```python
col_names = df.columns.values
print(col_names)
```
['Year' 'Macquarie Island MAX' 'Macquarie Island MIN' 'Mawson MAX'
'Mawson MIN' 'Casey MAX' 'Casey MIN' 'Davis MAX' 'Davis MIN']
```python
[print(i, col_names[i]) for i in range(len(col_names))]
x_col = int(input("Input index number of column to be used for x-axis: "))
y_col = int(input("Input index number of column to be used for y-axis: "))
y_col2 = input("Input index number of column 2 (optional) to be used for y-axis: ")
y_col2 = int(y_col2) if y_col2 else None
print(f"x-axis: {col_names[x_col]}, y-axis: {col_names[y_col]}, y-axis (second): {y_col2}")
```
0 Year
1 Macquarie Island MAX
2 Macquarie Island MIN
3 Mawson MAX
4 Mawson MIN
5 Casey MAX
6 Casey MIN
7 Davis MAX
8 Davis MIN
Input index number of column to be used for x-axis: 0
Input index number of column to be used for y-axis: 2
Input index number of column 2 (optional) to be used for y-axis: 1
x-axis: Year, y-axis: Macquarie Island MIN, y-axis (second): 1
```python
fig, axis = plt.subplots()
xlo = df[col_names[x_col]].min() - 5
xhi = df[col_names[x_col]].max() + 5
ylo = df[col_names[y_col]].min() - 5
yhi = df[col_names[y_col]].max() + 5
if y_col2:
ylo = min(ylo, df[col_names[y_col2]].min() - 5)
yhi = max(yhi, df[col_names[y_col]].max() + 5)
x = df[col_names[x_col]].to_numpy()
y = df[col_names[y_col]].to_numpy()
if y_col2:
y2 = df[col_names[y_col2]].to_numpy()
plt.axis([xlo, xhi, ylo, yhi])
# plt.plot(x, y, "bo")
plt.plot(x, y, "b")
if y_col2:
# plt.plot(x, y2, "ro")
plt.plot(x, y2, "r")
plt.show()
```
![png](output_13_0.png)
```python
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Loading…
Cancel
Save