Browse Source

Datetime, Plotly, update_plots.py and updated plots

pull/116/head
Jure Šorn 2 years ago
parent
commit
472e308877
7 changed files with 10893 additions and 2778 deletions
  1. 27
      README.md
  2. 31
      index.html
  3. 5422
      web/covid_cases.js
  4. BIN
      web/covid_cases.png
  5. 8110
      web/covid_deaths.js
  6. BIN
      web/covid_deaths.png
  7. 81
      web/update_plots.py

27
README.md

@ -598,12 +598,11 @@ from dateutil.tz import UTC, tzlocal, gettz, datetime_exists, resolve_imaginary
<D> = date(year, month, day)
<T> = time(hour=0, minute=0, second=0, microsecond=0, tzinfo=None, fold=0)
<DT> = datetime(year, month, day, hour=0, minute=0, second=0, ...)
<TD> = timedelta(days=0, seconds=0, microseconds=0, milliseconds=0,
minutes=0, hours=0, weeks=0)
<TD> = timedelta(weeks=0, days=0, hours=0, minutes=0, seconds=0, ...)
```
* **Use `'<D/DT>.weekday()'` to get the day of the week (Mon == 0).**
* **`'fold=1'` means the second pass in case of time jumping back for one hour.**
* **`'<DTa> = resolve_imaginary(<DTa>)'` fixes DTs that fall into the missing hour.**
* **TD converts and normalizes args to ±days, seconds (< 86,400) and microseconds (< 1M).**
### Now
```python
@ -3390,7 +3389,7 @@ continents = pd.read_csv('https://gist.githubusercontent.com/stevewithington/20a
df = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code')
df = df.groupby(['Continent_Name', 'date']).sum().reset_index()
df['Total Deaths per Million'] = df.total_deaths * 1e6 / df.population
df = df[('2020-03-14' < df.date) & (df.date < '2020-11-25')]
df = df[df.date > '2020-03-14']
df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')
line(df, x='Date', y='Total Deaths per Million', color='Continent').show()
```
@ -3414,23 +3413,23 @@ def scrape_data():
return df[df.location == 'World'].set_index('date').total_cases
def scrape_yahoo(slug):
url = f'https://query1.finance.yahoo.com/v7/finance/download/{slug}' + \
'?period1=1579651200&period2=1608850800&interval=1d&events=history'
'?period1=1579651200&period2=9999999999&interval=1d&events=history'
df = pd.read_csv(url, usecols=['Date', 'Close'])
return df.set_index('Date').Close
return scrape_covid(), scrape_yahoo('BTC-USD'), scrape_yahoo('GC=F'), scrape_yahoo('^DJI')
out = scrape_covid(), scrape_yahoo('BTC-USD'), scrape_yahoo('GC=F'), scrape_yahoo('^DJI')
return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones'])
def wrangle_data(covid, bitcoin, gold, dow):
df = pd.concat([bitcoin, gold, dow], axis=1)
df = df.sort_index().interpolate()
df = df.rolling(10, min_periods=1, center=True).mean()
df = df.loc['2020-02-23':'2020-11-25']
df = (df / df.iloc[0]) * 100
return pd.concat([covid, df], axis=1, join='inner')
df = pd.concat([bitcoin, gold, dow], axis=1) # Joins columns on dates.
df = df.sort_index().interpolate() # Sorts by date and interpolates NaN-s.
df = df.loc['2020-02-23':] # Discards rows before '2020-02-23'.
df = (df / df.iloc[0]) * 100 # Calculates percentages relative to day 1.
df = df.join(covid) # Adds column with covid cases.
return df.sort_values(df.index[-1], axis=1) # Sorts columns by last day's value.
def display_data(df):
df.columns = ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones']
figure = go.Figure()
for col_name in df:
for col_name in reversed(df.columns):
yaxis = 'y1' if col_name == 'Total Cases' else 'y2'
trace = go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=yaxis)
figure.add_trace(trace)

31
index.html

@ -50,7 +50,7 @@
<body>
<header>
<aside>December 17, 2021</aside>
<aside>December 21, 2021</aside>
<a href="https://gto76.github.io" rel="author">Jure Šorn</a>
</header>
@ -524,14 +524,13 @@ to_exclusive = &lt;range&gt;.stop
<div><h3 id="constructors">Constructors</h3><pre><code class="python language-python apache hljs">&lt;D&gt; = date(year, month, day)
&lt;T&gt; = time(hour=<span class="hljs-number">0</span>, minute=<span class="hljs-number">0</span>, second=<span class="hljs-number">0</span>, microsecond=<span class="hljs-number">0</span>, tzinfo=<span class="hljs-keyword">None</span>, fold=<span class="hljs-number">0</span>)
&lt;DT&gt; = datetime(year, month, day, hour=<span class="hljs-number">0</span>, minute=<span class="hljs-number">0</span>, second=<span class="hljs-number">0</span>, ...)
&lt;TD&gt; = timedelta(days=<span class="hljs-number">0</span>, seconds=<span class="hljs-number">0</span>, microseconds=<span class="hljs-number">0</span>, milliseconds=<span class="hljs-number">0</span>,
minutes=<span class="hljs-number">0</span>, hours=<span class="hljs-number">0</span>, weeks=<span class="hljs-number">0</span>)
&lt;TD&gt; = timedelta(weeks=<span class="hljs-number">0</span>, days=<span class="hljs-number">0</span>, hours=<span class="hljs-number">0</span>, minutes=<span class="hljs-number">0</span>, seconds=<span class="hljs-number">0</span>, ...)
</code></pre></div>
<ul>
<li><strong>Use <code class="python hljs"><span class="hljs-string">'&lt;D/DT&gt;.weekday()'</span></code> to get the day of the week (Mon == 0).</strong></li>
<li><strong><code class="python hljs"><span class="hljs-string">'fold=1'</span></code> means the second pass in case of time jumping back for one hour.</strong></li>
<li><strong><code class="python hljs"><span class="hljs-string">'&lt;DTa&gt; = resolve_imaginary(&lt;DTa&gt;)'</span></code> fixes DTs that fall into the missing hour.</strong></li>
<li><strong>TD converts and normalizes args to ±days, seconds (&lt; 86,400) and microseconds (&lt; 1M).</strong></li>
</ul>
<div><h3 id="now">Now</h3><pre><code class="python language-python hljs">&lt;D/DTn&gt; = D/DT.today() <span class="hljs-comment"># Current local date or naive datetime.</span>
&lt;DTn&gt; = DT.utcnow() <span class="hljs-comment"># Naive datetime from current UTC time.</span>
@ -2740,7 +2739,7 @@ continents = pd.read_csv(<span class="hljs-string">'https://gist.githubuserconte
df = pd.merge(covid, continents, left_on=<span class="hljs-string">'iso_code'</span>, right_on=<span class="hljs-string">'Three_Letter_Country_Code'</span>)
df = df.groupby([<span class="hljs-string">'Continent_Name'</span>, <span class="hljs-string">'date'</span>]).sum().reset_index()
df[<span class="hljs-string">'Total Deaths per Million'</span>] = df.total_deaths * <span class="hljs-number">1e6</span> / df.population
df = df[(<span class="hljs-string">'2020-03-14'</span> &lt; df.date) &amp; (df.date &lt; <span class="hljs-string">'2020-11-25'</span>)]
df = df[df.date &gt; <span class="hljs-string">'2020-03-14'</span>]
df = df.rename({<span class="hljs-string">'date'</span>: <span class="hljs-string">'Date'</span>, <span class="hljs-string">'Continent_Name'</span>: <span class="hljs-string">'Continent'</span>}, axis=<span class="hljs-string">'columns'</span>)
line(df, x=<span class="hljs-string">'Date'</span>, y=<span class="hljs-string">'Total Deaths per Million'</span>, color=<span class="hljs-string">'Continent'</span>).show()
</code></pre></div>
@ -2760,23 +2759,23 @@ line(df, x=<span class="hljs-string">'Date'</span>, y=<span class="hljs-string">
<span class="hljs-keyword">return</span> df[df.location == <span class="hljs-string">'World'</span>].set_index(<span class="hljs-string">'date'</span>).total_cases
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">scrape_yahoo</span><span class="hljs-params">(slug)</span>:</span>
url = <span class="hljs-string">f'https://query1.finance.yahoo.com/v7/finance/download/<span class="hljs-subst">{slug}</span>'</span> + \
<span class="hljs-string">'?period1=1579651200&amp;period2=1608850800&amp;interval=1d&amp;events=history'</span>
<span class="hljs-string">'?period1=1579651200&amp;period2=9999999999&amp;interval=1d&amp;events=history'</span>
df = pd.read_csv(url, usecols=[<span class="hljs-string">'Date'</span>, <span class="hljs-string">'Close'</span>])
<span class="hljs-keyword">return</span> df.set_index(<span class="hljs-string">'Date'</span>).Close
<span class="hljs-keyword">return</span> scrape_covid(), scrape_yahoo(<span class="hljs-string">'BTC-USD'</span>), scrape_yahoo(<span class="hljs-string">'GC=F'</span>), scrape_yahoo(<span class="hljs-string">'^DJI'</span>)
out = scrape_covid(), scrape_yahoo(<span class="hljs-string">'BTC-USD'</span>), scrape_yahoo(<span class="hljs-string">'GC=F'</span>), scrape_yahoo(<span class="hljs-string">'^DJI'</span>)
<span class="hljs-keyword">return</span> map(pd.Series.rename, out, [<span class="hljs-string">'Total Cases'</span>, <span class="hljs-string">'Bitcoin'</span>, <span class="hljs-string">'Gold'</span>, <span class="hljs-string">'Dow Jones'</span>])
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">wrangle_data</span><span class="hljs-params">(covid, bitcoin, gold, dow)</span>:</span>
df = pd.concat([bitcoin, gold, dow], axis=<span class="hljs-number">1</span>)
df = df.sort_index().interpolate()
df = df.rolling(<span class="hljs-number">10</span>, min_periods=<span class="hljs-number">1</span>, center=<span class="hljs-keyword">True</span>).mean()
df = df.loc[<span class="hljs-string">'2020-02-23'</span>:<span class="hljs-string">'2020-11-25'</span>]
df = (df / df.iloc[<span class="hljs-number">0</span>]) * <span class="hljs-number">100</span>
<span class="hljs-keyword">return</span> pd.concat([covid, df], axis=<span class="hljs-number">1</span>, join=<span class="hljs-string">'inner'</span>)
df = pd.concat([bitcoin, gold, dow], axis=<span class="hljs-number">1</span>) <span class="hljs-comment"># Joins columns on dates.</span>
df = df.sort_index().interpolate() <span class="hljs-comment"># Sorts by date and interpolates NaN-s.</span>
df = df.loc[<span class="hljs-string">'2020-02-23'</span>:] <span class="hljs-comment"># Discards rows before '2020-02-23'.</span>
df = (df / df.iloc[<span class="hljs-number">0</span>]) * <span class="hljs-number">100</span> <span class="hljs-comment"># Calculates percentages relative to day 1.</span>
df = df.join(covid) <span class="hljs-comment"># Adds column with covid cases.</span>
<span class="hljs-keyword">return</span> df.sort_values(df.index[<span class="hljs-number">-1</span>], axis=<span class="hljs-number">1</span>) <span class="hljs-comment"># Sorts columns by last day's value.</span>
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">display_data</span><span class="hljs-params">(df)</span>:</span>
df.columns = [<span class="hljs-string">'Total Cases'</span>, <span class="hljs-string">'Bitcoin'</span>, <span class="hljs-string">'Gold'</span>, <span class="hljs-string">'Dow Jones'</span>]
figure = go.Figure()
<span class="hljs-keyword">for</span> col_name <span class="hljs-keyword">in</span> df:
<span class="hljs-keyword">for</span> col_name <span class="hljs-keyword">in</span> reversed(df.columns):
yaxis = <span class="hljs-string">'y1'</span> <span class="hljs-keyword">if</span> col_name == <span class="hljs-string">'Total Cases'</span> <span class="hljs-keyword">else</span> <span class="hljs-string">'y2'</span>
trace = go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=yaxis)
figure.add_trace(trace)
@ -2870,7 +2869,7 @@ $ pyinstaller script.py --add-data '&lt;path&gt;:.' <span class="hljs-comment">
<footer>
<aside>December 17, 2021</aside>
<aside>December 21, 2021</aside>
<a href="https://gto76.github.io" rel="author">Jure Šorn</a>
</footer>

5422
web/covid_cases.js
File diff suppressed because it is too large
View File

BIN
web/covid_cases.png

Before After
Width: 960  |  Height: 325  |  Size: 57 KiB Width: 960  |  Height: 315  |  Size: 60 KiB

8110
web/covid_deaths.js
File diff suppressed because it is too large
View File

BIN
web/covid_deaths.png

Before After
Width: 960  |  Height: 349  |  Size: 48 KiB Width: 960  |  Height: 340  |  Size: 50 KiB

81
web/update_plots.py

@ -4,7 +4,7 @@
# Updates plots from the Plotly section so they show the latest data.
from pathlib import Path
from datetime import date, time, datetime, timedelta
import datetime
import pandas as pd
from plotly.express import line
import plotly.graph_objects as go
@ -19,26 +19,23 @@ def main():
def update_covid_deaths():
def update_readme(date_treshold):
lines = read_file(Path('..') / 'README.md')
out = [re.sub("df.date < '\d{4}-\d{2}-\d{2}'", f"df.date < '{date_treshold}'", line)
for line in lines]
write_to_file(Path('..') / 'README.md', out)
covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
usecols=['iso_code', 'date', 'total_deaths', 'population'])
continents = pd.read_csv('https://datahub.io/JohnSnowLabs/country-and-continent-codes-' + \
'list/r/country-and-continent-codes-list-csv.csv',
continents = pd.read_csv('https://gist.githubusercontent.com/stevewithington/20a69c0b6d2ff'
'846ea5d35e5fc47f26c/raw/country-and-continent-codes-list-csv.csv',
usecols=['Three_Letter_Country_Code', 'Continent_Name'])
df = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code')
df = df.groupby(['Continent_Name', 'date']).sum().reset_index()
df['Total Deaths per Million'] = df.total_deaths * 1e6 / df.population
date_treshold = str(date.today() - timedelta(days=2)) # '2020-11-25'
df = df[('2020-03-14' < df.date) & (df.date < date_treshold)]
df['Total Deaths per Million'] = round(df.total_deaths * 1e6 / df.population)
today = str(datetime.date.today())
df = df[('2020-02-22' < df.date) & (df.date < today)]
df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')
gb = df.groupby('Continent')
df['Max Total Deaths'] = gb[['Total Deaths per Million']].transform('max')
df = df.sort_values(['Max Total Deaths', 'Date'], ascending=[False, True])
f = line(df, x='Date', y='Total Deaths per Million', color='Continent')
f.update_layout(margin=dict(t=24, b=0), paper_bgcolor='rgba(0, 0, 0, 0)')
update_file('covid_deaths.js', f)
update_readme(date_treshold)
write_to_png_file('covid_deaths.png', f, width=960, height=340)
@ -50,34 +47,36 @@ def update_confirmed_cases():
write_to_png_file('covid_cases.png', f, width=960, height=315)
def scrape_data():
def scrape_yahoo(id_):
BASE_URL = 'https://query1.finance.yahoo.com/v7/finance/download/'
now = int(datetime.now().timestamp())
url = f'{BASE_URL}{id_}?period1=1579651200&period2={now}&interval=1d&' + \
'events=history'
return pd.read_csv(url, usecols=['Date', 'Close']).set_index('Date').Close
covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
usecols=['location', 'date', 'total_cases'])
covid = covid[covid.location == 'World'].set_index('date').total_cases
dow, gold, bitcoin = [scrape_yahoo(id_) for id_ in ('^DJI', 'GC=F', 'BTC-USD')]
dow.name, gold.name, bitcoin.name = 'Dow Jones', 'Gold', 'Bitcoin'
return covid, dow, gold, bitcoin
def wrangle_data(covid, dow, gold, bitcoin):
df = pd.concat([dow, gold, bitcoin], axis=1)
df = df.sort_index().interpolate()
df = df.rolling(10, min_periods=1, center=True).mean()
df = df.loc['2020-02-23':].iloc[:-2]
df = (df / df.iloc[0]) * 100
return pd.concat([covid, df], axis=1, join='inner')
def scrape_covid():
url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
df = pd.read_csv(url, usecols=['location', 'date', 'total_cases'])
return df[df.location == 'World'].set_index('date').total_cases
def scrape_yahoo(slug):
url = f'https://query1.finance.yahoo.com/v7/finance/download/{slug}' + \
'?period1=1579651200&period2=9999999999&interval=1d&events=history'
df = pd.read_csv(url, usecols=['Date', 'Close'])
return df.set_index('Date').Close
out = scrape_covid(), scrape_yahoo('BTC-USD'), scrape_yahoo('GC=F'), scrape_yahoo('^DJI')
return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones'])
def wrangle_data(covid, bitcoin, gold, dow):
df = pd.concat([dow, gold, bitcoin], axis=1) # Joins columns on dates.
df = df.sort_index().interpolate() # Sorts by date and interpolates NaN-s.
yesterday = str(datetime.date.today() - datetime.timedelta(1))
df = df.loc['2020-02-23':yesterday] # Discards rows before '2020-02-23'.
df = round((df / df.iloc[0]) * 100, 2) # Calculates percentages relative to day 1.
df = df.join(covid) # Adds column with covid cases.
return df.sort_values(df.index[-1], axis=1) # Sorts columns by last day's value.
def get_figure(df):
def get_trace(col_name):
return go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis='y2')
traces = [get_trace(col_name) for col_name in df.columns[1:]]
traces.append(go.Scatter(x=df.index, y=df.total_cases, name='Total Cases', yaxis='y1'))
figure = go.Figure()
figure.add_traces(traces)
for col_name in reversed(df.columns):
yaxis = 'y1' if col_name == 'Total Cases' else 'y2'
colors = {'Total Cases': 'red', 'Bitcoin': 'blue', 'Gold': 'goldenrod',
'Dow Jones': 'green'}
trace = go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=yaxis,
line=dict(color=colors[col_name]))
figure.add_trace(trace)
figure.update_layout(
yaxis1=dict(title='Total Cases', rangemode='tozero'),
yaxis2=dict(title='%', rangemode='tozero', overlaying='y', side='right'),
@ -90,6 +89,10 @@ def update_confirmed_cases():
main()
###
## UTIL
#
def update_file(filename, figure):
lines = read_file(filename)
f_json = figure.to_json(pretty=True).replace('\n', '\n ')
@ -97,10 +100,6 @@ def update_file(filename, figure):
write_to_file(filename, out)
###
## UTIL
#
def read_file(filename):
p = Path(__file__).resolve().parent / filename
with open(p, encoding='utf-8') as file:

Loading…
Cancel
Save