#!/usr/bin/env python3 # # Usage: ./update_plots.py # Updates plots from the Plotly section so they show the latest data. from pathlib import Path import datetime import pandas as pd from plotly.express import line import plotly.graph_objects as go import re def main(): """ This function scrapes the data from the web and wrangles it into a pandas DataFrame. It then creates an interactive plotly line graph of covid cases. """ print('Updating covid deaths...') update_covid_deaths() print('Updating covid cases...') update_confirmed_cases() def update_covid_deaths(): """ Update the plot of global COVID-19 deaths over time. :param df: A pandas DataFrame with columns 'Continent', 'Date', and 'Total Deaths per Million'. """ covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv', usecols=['iso_code', 'date', 'total_deaths', 'population']) continents = pd.read_csv('https://gist.githubusercontent.com/stevewithington/20a69c0b6d2ff' '846ea5d35e5fc47f26c/raw/country-and-continent-codes-list-csv.csv', usecols=['Three_Letter_Country_Code', 'Continent_Name']) df = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code') df = df.groupby(['Continent_Name', 'date']).sum().reset_index() df['Total Deaths per Million'] = round(df.total_deaths * 1e6 / df.population) today = str(datetime.date.today()) df = df[('2020-02-22' < df.date) & (df.date < today)] df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns') gb = df.groupby('Continent') df['Max Total Deaths'] = gb[['Total Deaths per Million']].transform('max') df = df.sort_values(['Max Total Deaths', 'Date'], ascending=[False, True]) f = line(df, x='Date', y='Total Deaths per Million', color='Continent') f.update_layout(margin=dict(t=24, b=0), paper_bgcolor='rgba(0, 0, 0, 0)') update_file('covid_deaths.js', f) f.layout.paper_bgcolor = 'rgb(255, 255, 255)' write_to_png_file('covid_deaths.png', f, width=960, height=340) def update_confirmed_cases(): """ Update the file covid_cases.js with a plot of total cases, gold price, bitcoin price and Dow Jones index. """ def main(): """ This function scrapes the data from the web and wrangles it into a pandas DataFrame. It then creates an interactive plotly line graph of covid cases in New York State. """ df = wrangle_data(*scrape_data()) f = get_figure(df) update_file('covid_cases.js', f) f.layout.paper_bgcolor = 'rgb(255, 255, 255)' write_to_png_file('covid_cases.png', f, width=960, height=315) def scrape_data(): """ This function scrapes data from the following sources: 1. Our World in Data (Total Cases) 2. Yahoo Finance (Bitcoin, Gold, Dow Jones) The function returns a list of pandas Series objects containing the scraped data. """ def scrape_covid(): """ This function scrapes the total number of covid cases from a csv file on the internet. """ url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv' df = pd.read_csv(url, usecols=['location', 'date', 'total_cases']) return df[df.location == 'World'].set_index('date').total_cases def scrape_yahoo(slug): """ Downloads historical stock price data from Yahoo Finance. :param str slug: The ticker symbol of the desired security. Expected to be a valid argument for the `yfinance` function `Ticker()`. :returns pd.Series(float): A pandas Series with timestamps as indices and adjusted closing prices as values, sorted by timestamp in ascending order. """ url = f'https://query1.finance.yahoo.com/v7/finance/download/{slug}' + \ '?period1=1579651200&period2=9999999999&interval=1d&events=history' df = pd.read_csv(url, usecols=['Date', 'Close']) return df.set_index('Date').Close out = [scrape_covid(), scrape_yahoo('BTC-USD'), scrape_yahoo('GC=F'), scrape_yahoo('^DJI')] return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones']) def wrangle_data(covid, bitcoin, gold, dow): """ This function joins the Dow Jones, Gold and Bitcoin dataframes into a single one. It then sorts them by date and interpolates missing values. It discards rows before '2020-02-23'. Finally it calculates percentages relative to day 1 of each series (Dow Jones, Gold, Bitcoin) and adds a column with covid cases. The result is returned as a new dataframe sorted by date in descending order. """ df = pd.concat([dow, gold, bitcoin], axis=1) # Joins columns on dates. df = df.sort_index().interpolate() # Sorts by date and interpolates NaN-s. yesterday = str(datetime.date.today() - datetime.timedelta(1)) df = df.loc['2020-02-23':yesterday] # Discards rows before '2020-02-23'. df = round((df / df.iloc[0]) * 100, 2) # Calculates percentages relative to day 1 df = df.join(covid) # Adds column with covid cases. return df.sort_values(df.index[-1], axis=1) # Sorts columns by last day's value. def get_figure(df): """ This function returns a plotly figure that shows the total cases of COVID-19 in the US and its economic indicators. The data is taken from [The New York Times](#) and retrieved using [NYT API](#). """ figure = go.Figure() for col_name in reversed(df.columns): yaxis = 'y1' if col_name == 'Total Cases' else 'y2' colors = {'Total Cases': '#EF553B', 'Bitcoin': '#636efa', 'Gold': '#FFA15A', 'Dow Jones': '#00cc96'} trace = go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=yaxis, line=dict(color=colors[col_name])) figure.add_trace(trace) figure.update_layout( yaxis1=dict(title='Total Cases', rangemode='tozero'), yaxis2=dict(title='%', rangemode='tozero', overlaying='y', side='right'), legend=dict(x=1.1), margin=dict(t=24, b=0), paper_bgcolor='rgba(0, 0, 0, 0)' ) return figure main() ### ## UTIL # def update_file(filename, figure): """ Updates the file at `filename` with the plotly figure `figure`. :param filename: The path to a JSON file containing a Plotly figure. :type filename: str, required. The extension of the file must be .json or .js (for legacy reasons). Note that if you are using JupyterLab and want to open your updated HTML files in an external browser window then you should save your notebook as an HTML file instead of as a Jupyter notebook. For more information see this guide on [using Jupyter with Google Colab](http://jupyter- notebook.readthedocs.io/en/stable/examples/Notebook/Running%20Code.html#Running-code). If you are not using JupyterLab then it is recommended that you use .html for all types of notebooks so that they can be opened in any web browser, including Chrome, Firefox and Edge on Windows and macOS without any extra configuration needed (see below for more details). This is because some browsers do not support JavaScript which is used by default by Plotly's exporting functions to generate """ lines = read_file(filename) f_json = figure.to_json(pretty=True).replace('\n', '\n ') out = lines[:6] + [f' {f_json}\n', ' )\n', '};\n'] write_to_file(filename, out) def read_file(filename): p = Path(__file__).resolve().parent / filename with open(p, encoding='utf-8') as file: return file.readlines() def write_to_file(filename, lines): p = Path(__file__).resolve().parent / filename with open(p, 'w', encoding='utf-8') as file: file.writelines(lines) def write_to_png_file(filename, figure, width, height): p = Path(__file__).resolve().parent / filename figure.write_image(str(p), width=width, height=height) if __name__ == '__main__': main()