Browse Source

Pandas and Plotly

pull/57/head
Jure Šorn 4 years ago
parent
commit
0db0c8a541
3 changed files with 54 additions and 52 deletions
  1. 50
      README.md
  2. 52
      index.html
  3. 4
      parse.js

50
README.md

@ -3123,14 +3123,13 @@ Name: a, dtype: int64
>>> sr = Series([1, 2], index=['x', 'y'])
x 1
y 2
dtype: int64
```
```text
+-------------+-------------+-------------+---------------+
| | 'sum' | ['sum'] | {'s': 'sum'} |
+-------------+-------------+-------------+---------------+
| sr.apply(…) | 3 | sum 3 | s 3 |
| sr.apply(…) | 3 | sum 3 | s 3 |
| sr.agg(…) | | | |
+-------------+-------------+-------------+---------------+
```
@ -3144,6 +3143,7 @@ dtype: int64
| sr.trans(…) | y 2 | y 2 | y 2 |
+-------------+-------------+-------------+---------------+
```
* **Last result has a hierarchical index. `'<Sr>[<key_1>, <key_2>]'` returns the value.**
### DataFrame
**Table with labeled rows and columns.**
@ -3358,12 +3358,12 @@ covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
continents = pd.read_csv('https://datahub.io/JohnSnowLabs/country-and-continent-codes-' + \
'list/r/country-and-continent-codes-list-csv.csv',
usecols=['Three_Letter_Country_Code', 'Continent_Name'])
merged = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code')
summed = merged.groupby(['Continent_Name', 'date']).sum().reset_index()
summed['Total Deaths per Million'] = summed.total_deaths * 1e6 / summed.population
summed = summed[('2020-03-14' < summed.date) & (summed.date < '2020-06-25')]
summed = summed.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')
plotly.express.line(summed, x='Date', y='Total Deaths per Million', color='Continent').show()
df = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code')
df = df.groupby(['Continent_Name', 'date']).sum().reset_index()
df['Total Deaths per Million'] = df.total_deaths * 1e6 / df.population
df = df[('2020-03-14' < df.date) & (df.date < '2020-06-25')]
df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')
plotly.express.line(df, x='Date', y='Total Deaths per Million', color='Continent').show()
```
### Confirmed Covid Cases, Dow Jones, Gold, and Bitcoin Price
@ -3379,7 +3379,7 @@ def main():
display_data(wrangle_data(*scrape_data()))
def scrape_data():
def scrape_yah(id_):
def scrape_yahoo(id_):
BASE_URL = 'https://query1.finance.yahoo.com/v7/finance/download/'
now = int(datetime.datetime.now().timestamp())
url = f'{BASE_URL}{id_}?period1=1579651200&period2={now}&interval=1d&events=history'
@ -3387,23 +3387,23 @@ def scrape_data():
covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
usecols=['date', 'total_cases'])
covid = covid.groupby('date').sum()
dow_jones, gold, bitcoin = scrape_yah('^DJI'), scrape_yah('GC=F'), scrape_yah('BTC-USD')
dow_jones.name, gold.name, bitcoin.name = 'Dow Jones', 'Gold', 'Bitcoin'
return covid, dow_jones, gold, bitcoin
def wrangle_data(covid, dow_jones, gold, bitcoin):
out = pandas.concat([covid, dow_jones, gold, bitcoin], axis=1)
out = out.loc['2020-02-23':].iloc[:-2]
out = out.interpolate()
out.iloc[:, 1:] = out.rolling(10, min_periods=1, center=True).mean().iloc[:, 1:]
out.iloc[:, 1:] = out.iloc[:, 1:] / out.iloc[0, 1:] * 100
return out
def display_data(out):
dow, gold, bitcoin = [scrape_yahoo(id_) for id_ in ('^DJI', 'GC=F', 'BTC-USD')]
dow.name, gold.name, bitcoin.name = 'Dow Jones', 'Gold', 'Bitcoin'
return covid, dow, gold, bitcoin
def wrangle_data(covid, dow, gold, bitcoin):
df = pandas.concat([covid, dow, gold, bitcoin], axis=1)
df = df.loc['2020-02-23':].iloc[:-2]
df = df.interpolate()
df.iloc[:, 1:] = df.rolling(10, min_periods=1, center=True).mean().iloc[:, 1:]
df.iloc[:, 1:] = df.iloc[:, 1:] / df.iloc[0, 1:] * 100
return df
def display_data(df):
def get_trace(col_name):
return go.Scatter(x=out.index, y=out[col_name], name=col_name, yaxis='y2')
traces = [get_trace(col_name) for col_name in out.columns[1:]]
traces.append(go.Scatter(x=out.index, y=out.total_cases, name='Total Cases', yaxis='y1'))
return go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis='y2')
traces = [get_trace(col_name) for col_name in df.columns[1:]]
traces.append(go.Scatter(x=df.index, y=df.total_cases, name='Total Cases', yaxis='y1'))
figure = go.Figure()
figure.add_traces(traces)
figure.update_layout(

52
index.html

@ -2660,12 +2660,11 @@ Name: a, dtype: int64
<pre><code class="python language-python hljs"><span class="hljs-meta">&gt;&gt;&gt; </span>sr = Series([<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], index=[<span class="hljs-string">'x'</span>, <span class="hljs-string">'y'</span>])
x <span class="hljs-number">1</span>
y <span class="hljs-number">2</span>
dtype: int64
</code></pre>
<pre><code class="python hljs">┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
┃ │ <span class="hljs-string">'sum'</span> │ [<span class="hljs-string">'sum'</span>] │ {<span class="hljs-string">'s'</span>: <span class="hljs-string">'sum'</span>} ┃
┠─────────────┼─────────────┼─────────────┼───────────────┨
┃ sr.apply(…) │ <span class="hljs-number">3</span> │ sum <span class="hljs-number">3</span> s <span class="hljs-number">3</span>
┃ sr.apply(…) │ <span class="hljs-number">3</span> │ sum <span class="hljs-number">3</span> │ s <span class="hljs-number">3</span>
┃ sr.agg(…) │ │ │ ┃
┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
</code></pre>
@ -2677,6 +2676,9 @@ dtype: int64
┃ sr.trans(…) │ y <span class="hljs-number">2</span> │ y <span class="hljs-number">2</span> │ y <span class="hljs-number">2</span>
┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
</code></pre>
<ul>
<li><strong>Last result has a hierarchical index. <code class="python hljs"><span class="hljs-string">'&lt;Sr&gt;[&lt;key_1&gt;, &lt;key_2&gt;]'</span></code> returns the value.</strong></li>
</ul>
<div><h3 id="dataframe">DataFrame</h3><p><strong>Table with labeled rows and columns.</strong></p><pre><code class="python language-python hljs"><span class="hljs-meta">&gt;&gt;&gt; </span>DataFrame([[<span class="hljs-number">1</span>, <span class="hljs-number">2</span>], [<span class="hljs-number">3</span>, <span class="hljs-number">4</span>]], index=[<span class="hljs-string">'a'</span>, <span class="hljs-string">'b'</span>], columns=[<span class="hljs-string">'x'</span>, <span class="hljs-string">'y'</span>])
x y
a <span class="hljs-number">1</span> <span class="hljs-number">2</span>
@ -2839,12 +2841,12 @@ covid = pd.read_csv(<span class="hljs-string">'https://covid.ourworldindata.org/
continents = pd.read_csv(<span class="hljs-string">'https://datahub.io/JohnSnowLabs/country-and-continent-codes-'</span> + \
<span class="hljs-string">'list/r/country-and-continent-codes-list-csv.csv'</span>,
usecols=[<span class="hljs-string">'Three_Letter_Country_Code'</span>, <span class="hljs-string">'Continent_Name'</span>])
merged = pd.merge(covid, continents, left_on=<span class="hljs-string">'iso_code'</span>, right_on=<span class="hljs-string">'Three_Letter_Country_Code'</span>)
summed = merged.groupby([<span class="hljs-string">'Continent_Name'</span>, <span class="hljs-string">'date'</span>]).sum().reset_index()
summed[<span class="hljs-string">'Total Deaths per Million'</span>] = summed.total_deaths * <span class="hljs-number">1e6</span> / summed.population
summed = summed[(<span class="hljs-string">'2020-03-14'</span> &lt; summed.date) &amp; (summed.date &lt; <span class="hljs-string">'2020-06-25'</span>)]
summed = summed.rename({<span class="hljs-string">'date'</span>: <span class="hljs-string">'Date'</span>, <span class="hljs-string">'Continent_Name'</span>: <span class="hljs-string">'Continent'</span>}, axis=<span class="hljs-string">'columns'</span>)
plotly.express.line(summed, x=<span class="hljs-string">'Date'</span>, y=<span class="hljs-string">'Total Deaths per Million'</span>, color=<span class="hljs-string">'Continent'</span>).show()
df = pd.merge(covid, continents, left_on=<span class="hljs-string">'iso_code'</span>, right_on=<span class="hljs-string">'Three_Letter_Country_Code'</span>)
df = df.groupby([<span class="hljs-string">'Continent_Name'</span>, <span class="hljs-string">'date'</span>]).sum().reset_index()
df[<span class="hljs-string">'Total Deaths per Million'</span>] = df.total_deaths * <span class="hljs-number">1e6</span> / df.population
df = df[(<span class="hljs-string">'2020-03-14'</span> &lt; df.date) &amp; (df.date &lt; <span class="hljs-string">'2020-06-25'</span>)]
df = df.rename({<span class="hljs-string">'date'</span>: <span class="hljs-string">'Date'</span>, <span class="hljs-string">'Continent_Name'</span>: <span class="hljs-string">'Continent'</span>}, axis=<span class="hljs-string">'columns'</span>)
plotly.express.line(df, x=<span class="hljs-string">'Date'</span>, y=<span class="hljs-string">'Total Deaths per Million'</span>, color=<span class="hljs-string">'Continent'</span>).show()
</code></pre></div></div>
@ -2857,7 +2859,7 @@ plotly.express.line(summed, x=<span class="hljs-string">'Date'</span>, y=<span c
display_data(wrangle_data(*scrape_data()))
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">scrape_data</span><span class="hljs-params">()</span>:</span>
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">scrape_yah</span><span class="hljs-params">(id_)</span>:</span>
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">scrape_yahoo</span><span class="hljs-params">(id_)</span>:</span>
BASE_URL = <span class="hljs-string">'https://query1.finance.yahoo.com/v7/finance/download/'</span>
now = int(datetime.datetime.now().timestamp())
url = <span class="hljs-string">f'<span class="hljs-subst">{BASE_URL}</span><span class="hljs-subst">{id_}</span>?period1=1579651200&amp;period2=<span class="hljs-subst">{now}</span>&amp;interval=1d&amp;events=history'</span>
@ -2865,23 +2867,23 @@ plotly.express.line(summed, x=<span class="hljs-string">'Date'</span>, y=<span c
covid = pd.read_csv(<span class="hljs-string">'https://covid.ourworldindata.org/data/owid-covid-data.csv'</span>,
usecols=[<span class="hljs-string">'date'</span>, <span class="hljs-string">'total_cases'</span>])
covid = covid.groupby(<span class="hljs-string">'date'</span>).sum()
dow_jones, gold, bitcoin = scrape_yah(<span class="hljs-string">'^DJI'</span>), scrape_yah(<span class="hljs-string">'GC=F'</span>), scrape_yah(<span class="hljs-string">'BTC-USD'</span>)
dow_jones.name, gold.name, bitcoin.name = <span class="hljs-string">'Dow Jones'</span>, <span class="hljs-string">'Gold'</span>, <span class="hljs-string">'Bitcoin'</span>
<span class="hljs-keyword">return</span> covid, dow_jones, gold, bitcoin
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">wrangle_data</span><span class="hljs-params">(covid, dow_jones, gold, bitcoin)</span>:</span>
out = pandas.concat([covid, dow_jones, gold, bitcoin], axis=<span class="hljs-number">1</span>)
out = out.loc[<span class="hljs-string">'2020-02-23'</span>:].iloc[:<span class="hljs-number">-2</span>]
out = out.interpolate()
out.iloc[:, <span class="hljs-number">1</span>:] = out.rolling(<span class="hljs-number">10</span>, min_periods=<span class="hljs-number">1</span>, center=<span class="hljs-keyword">True</span>).mean().iloc[:, <span class="hljs-number">1</span>:]
out.iloc[:, <span class="hljs-number">1</span>:] = out.iloc[:, <span class="hljs-number">1</span>:] / out.iloc[<span class="hljs-number">0</span>, <span class="hljs-number">1</span>:] * <span class="hljs-number">100</span>
<span class="hljs-keyword">return</span> out
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">display_data</span><span class="hljs-params">(out)</span>:</span>
dow, gold, bitcoin = [scrape_yahoo(id_) <span class="hljs-keyword">for</span> id_ <span class="hljs-keyword">in</span> (<span class="hljs-string">'^DJI'</span>, <span class="hljs-string">'GC=F'</span>, <span class="hljs-string">'BTC-USD'</span>)]
dow.name, gold.name, bitcoin.name = <span class="hljs-string">'Dow Jones'</span>, <span class="hljs-string">'Gold'</span>, <span class="hljs-string">'Bitcoin'</span>
<span class="hljs-keyword">return</span> covid, dow, gold, bitcoin
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">wrangle_data</span><span class="hljs-params">(covid, dow, gold, bitcoin)</span>:</span>
df = pandas.concat([covid, dow, gold, bitcoin], axis=<span class="hljs-number">1</span>)
df = df.loc[<span class="hljs-string">'2020-02-23'</span>:].iloc[:<span class="hljs-number">-2</span>]
df = df.interpolate()
df.iloc[:, <span class="hljs-number">1</span>:] = df.rolling(<span class="hljs-number">10</span>, min_periods=<span class="hljs-number">1</span>, center=<span class="hljs-keyword">True</span>).mean().iloc[:, <span class="hljs-number">1</span>:]
df.iloc[:, <span class="hljs-number">1</span>:] = df.iloc[:, <span class="hljs-number">1</span>:] / df.iloc[<span class="hljs-number">0</span>, <span class="hljs-number">1</span>:] * <span class="hljs-number">100</span>
<span class="hljs-keyword">return</span> df
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">display_data</span><span class="hljs-params">(df)</span>:</span>
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">get_trace</span><span class="hljs-params">(col_name)</span>:</span>
<span class="hljs-keyword">return</span> go.Scatter(x=out.index, y=out[col_name], name=col_name, yaxis=<span class="hljs-string">'y2'</span>)
traces = [get_trace(col_name) <span class="hljs-keyword">for</span> col_name <span class="hljs-keyword">in</span> out.columns[<span class="hljs-number">1</span>:]]
traces.append(go.Scatter(x=out.index, y=out.total_cases, name=<span class="hljs-string">'Total Cases'</span>, yaxis=<span class="hljs-string">'y1'</span>))
<span class="hljs-keyword">return</span> go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=<span class="hljs-string">'y2'</span>)
traces = [get_trace(col_name) <span class="hljs-keyword">for</span> col_name <span class="hljs-keyword">in</span> df.columns[<span class="hljs-number">1</span>:]]
traces.append(go.Scatter(x=df.index, y=df.total_cases, name=<span class="hljs-string">'Total Cases'</span>, yaxis=<span class="hljs-string">'y1'</span>))
figure = go.Figure()
figure.add_traces(traces)
figure.update_layout(

4
parse.js

@ -275,13 +275,13 @@ const DIAGRAM_12_B =
'┗━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━┷━━━━━━━━━━━━━┛\n';
const DIAGRAM_13_A =
'| sr.apply(…) | 3 | sum 3 | s 3 |';
'| sr.apply(…) | 3 | sum 3 | s 3 |';
const DIAGRAM_13_B =
"┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓\n" +
"┃ │ 'sum' │ ['sum'] │ {'s': 'sum'} ┃\n" +
"┠─────────────┼─────────────┼─────────────┼───────────────┨\n" +
"┃ sr.apply(…) │ 3 │ sum 3 │ s 3 ┃\n" +
"┃ sr.apply(…) │ 3 │ sum 3 │ s 3 ┃\n" +
"┃ sr.agg(…) │ │ │ ┃\n" +
"┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n";

Loading…
Cancel
Save