|
|
@ -12,6 +12,11 @@ import re |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
""" |
|
|
|
This function scrapes the data from the web and wrangles it into a pandas DataFrame. |
|
|
|
It then creates an interactive plotly line graph of covid cases |
|
|
|
in New York State. |
|
|
|
""" |
|
|
|
print('Updating covid deaths...') |
|
|
|
update_covid_deaths() |
|
|
|
print('Updating covid cases...') |
|
|
@ -19,6 +24,11 @@ def main(): |
|
|
|
|
|
|
|
|
|
|
|
def update_covid_deaths(): |
|
|
|
""" |
|
|
|
Update the plot of global COVID-19 deaths over time. |
|
|
|
|
|
|
|
:param df: A pandas DataFrame with columns 'Continent', 'Date', and 'Total Deaths per Million'. |
|
|
|
""" |
|
|
|
covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv', |
|
|
|
usecols=['iso_code', 'date', 'total_deaths', 'population']) |
|
|
|
continents = pd.read_csv('https://gist.githubusercontent.com/stevewithington/20a69c0b6d2ff' |
|
|
@ -41,7 +51,15 @@ def update_covid_deaths(): |
|
|
|
|
|
|
|
|
|
|
|
def update_confirmed_cases(): |
|
|
|
""" |
|
|
|
Update the file covid_cases.js with a plot of total cases, gold price, bitcoin price and Dow Jones index. |
|
|
|
""" |
|
|
|
def main(): |
|
|
|
""" |
|
|
|
This function scrapes the data from the web and wrangles it into a pandas DataFrame. |
|
|
|
It then creates an interactive plotly line graph of covid cases |
|
|
|
in New York State. |
|
|
|
""" |
|
|
|
df = wrangle_data(*scrape_data()) |
|
|
|
f = get_figure(df) |
|
|
|
update_file('covid_cases.js', f) |
|
|
@ -49,11 +67,29 @@ def update_confirmed_cases(): |
|
|
|
write_to_png_file('covid_cases.png', f, width=960, height=315) |
|
|
|
|
|
|
|
def scrape_data(): |
|
|
|
""" |
|
|
|
This function scrapes data from the following sources: |
|
|
|
1. Our World in Data (Total Cases) |
|
|
|
2. Yahoo Finance (Bitcoin, Gold, Dow Jones) |
|
|
|
The |
|
|
|
function returns a list of pandas Series objects containing the scraped data. |
|
|
|
""" |
|
|
|
def scrape_covid(): |
|
|
|
""" |
|
|
|
This function scrapes the total number of covid cases from a csv file on the internet. |
|
|
|
""" |
|
|
|
url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv' |
|
|
|
df = pd.read_csv(url, usecols=['location', 'date', 'total_cases']) |
|
|
|
return df[df.location == 'World'].set_index('date').total_cases |
|
|
|
def scrape_yahoo(slug): |
|
|
|
""" |
|
|
|
Downloads historical stock price data from Yahoo Finance. |
|
|
|
|
|
|
|
:param str slug: The ticker symbol of the desired security. Expected to be a valid argument |
|
|
|
for the `yfinance` function `Ticker()`. |
|
|
|
:returns pd.Series(float): A pandas Series with timestamps as indices and adjusted closing prices as values, |
|
|
|
sorted by timestamp in ascending order. |
|
|
|
""" |
|
|
|
url = f'https://query1.finance.yahoo.com/v7/finance/download/{slug}' + \ |
|
|
|
'?period1=1579651200&period2=9999999999&interval=1d&events=history' |
|
|
|
df = pd.read_csv(url, usecols=['Date', 'Close']) |
|
|
@ -63,6 +99,14 @@ def update_confirmed_cases(): |
|
|
|
return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones']) |
|
|
|
|
|
|
|
def wrangle_data(covid, bitcoin, gold, dow): |
|
|
|
""" |
|
|
|
This function joins the Dow Jones, Gold and Bitcoin dataframes into a single one. |
|
|
|
It then sorts them by date and interpolates missing values. It |
|
|
|
discards rows before '2020-02-23'. |
|
|
|
Finally it calculates percentages relative to day 1 of each series (Dow Jones, Gold, Bitcoin) |
|
|
|
and adds a column |
|
|
|
with covid cases. The result is returned as a new dataframe sorted by date in descending order. |
|
|
|
""" |
|
|
|
df = pd.concat([dow, gold, bitcoin], axis=1) # Joins columns on dates. |
|
|
|
df = df.sort_index().interpolate() # Sorts by date and interpolates NaN-s. |
|
|
|
yesterday = str(datetime.date.today() - datetime.timedelta(1)) |
|
|
@ -72,6 +116,11 @@ def update_confirmed_cases(): |
|
|
|
return df.sort_values(df.index[-1], axis=1) # Sorts columns by last day's value. |
|
|
|
|
|
|
|
def get_figure(df): |
|
|
|
""" |
|
|
|
This function returns a plotly figure that shows the total cases of COVID-19 in the US and its economic |
|
|
|
indicators. The data is taken from [The New |
|
|
|
York Times](#) and retrieved using [NYT API](#). |
|
|
|
""" |
|
|
|
figure = go.Figure() |
|
|
|
for col_name in reversed(df.columns): |
|
|
|
yaxis = 'y1' if col_name == 'Total Cases' else 'y2' |
|
|
@ -97,6 +146,27 @@ def update_confirmed_cases(): |
|
|
|
# |
|
|
|
|
|
|
|
def update_file(filename, figure): |
|
|
|
""" |
|
|
|
Updates the file at `filename` with the plotly figure `figure`. |
|
|
|
|
|
|
|
:param filename: The path to a JSON file containing a Plotly figure. |
|
|
|
:type filename: |
|
|
|
str, required. |
|
|
|
The extension of the file must be .json or .js (for legacy reasons). |
|
|
|
|
|
|
|
Note that if you are using JupyterLab and want to open |
|
|
|
your updated |
|
|
|
HTML files in an external browser window then you should save your |
|
|
|
notebook as an HTML file instead of as a Jupyter notebook. |
|
|
|
For more |
|
|
|
information see this guide on [using Jupyter with Google Colab](http://jupyter- |
|
|
|
notebook.readthedocs.io/en/stable/examples/Notebook/Running%20Code.html#Running-code). |
|
|
|
|
|
|
|
If you are not using JupyterLab then it is recommended |
|
|
|
that you use .html for all types of notebooks so that they can be opened in any web browser, including Chrome, Firefox and Edge on Windows and macOS |
|
|
|
without any extra configuration needed (see below for more details). This is because some browsers do not support JavaScript which is used by default |
|
|
|
by Plotly's exporting functions to generate |
|
|
|
""" |
|
|
|
lines = read_file(filename) |
|
|
|
f_json = figure.to_json(pretty=True).replace('\n', '\n ') |
|
|
|
out = lines[:6] + [f' {f_json}\n', ' )\n', '};\n'] |
|
|
|
xxxxxxxxxx