You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

124 lines
5.1 KiB

4 years ago
4 years ago
  1. #!/usr/bin/env python3
  2. #
  3. # Usage: ./update_plots.py
  4. # Updates plots from the Plotly section so they show the latest data.
  5. from pathlib import Path
  6. import datetime
  7. import pandas as pd
  8. from plotly.express import line
  9. import plotly.graph_objects as go
  10. import re
  11. def main():
  12. print('Updating covid deaths...')
  13. update_covid_deaths()
  14. print('Updating covid cases...')
  15. update_confirmed_cases()
  16. def update_covid_deaths():
  17. covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
  18. usecols=['iso_code', 'date', 'total_deaths', 'population'])
  19. continents = pd.read_csv('https://gist.githubusercontent.com/stevewithington/20a69c0b6d2ff'
  20. '846ea5d35e5fc47f26c/raw/country-and-continent-codes-list-csv.csv',
  21. usecols=['Three_Letter_Country_Code', 'Continent_Name'])
  22. df = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code')
  23. df = df.groupby(['Continent_Name', 'date']).sum().reset_index()
  24. df['Total Deaths per Million'] = round(df.total_deaths * 1e6 / df.population)
  25. today = str(datetime.date.today())
  26. df = df[('2020-02-22' < df.date) & (df.date < today)]
  27. df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')
  28. gb = df.groupby('Continent')
  29. df['Max Total Deaths'] = gb[['Total Deaths per Million']].transform('max')
  30. df = df.sort_values(['Max Total Deaths', 'Date'], ascending=[False, True])
  31. f = line(df, x='Date', y='Total Deaths per Million', color='Continent')
  32. f.update_layout(margin=dict(t=24, b=0), paper_bgcolor='rgba(0, 0, 0, 0)')
  33. update_file('covid_deaths.js', f)
  34. f.layout.paper_bgcolor = 'rgb(255, 255, 255)'
  35. write_to_png_file('covid_deaths.png', f, width=960, height=340)
  36. def update_confirmed_cases():
  37. def main():
  38. df = wrangle_data(*scrape_data())
  39. f = get_figure(df)
  40. update_file('covid_cases.js', f)
  41. f.layout.paper_bgcolor = 'rgb(255, 255, 255)'
  42. write_to_png_file('covid_cases.png', f, width=960, height=315)
  43. def scrape_data():
  44. def scrape_covid():
  45. url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
  46. df = pd.read_csv(url, usecols=['location', 'date', 'total_cases'])
  47. return df[df.location == 'World'].set_index('date').total_cases
  48. def scrape_yahoo(slug):
  49. url = f'https://query1.finance.yahoo.com/v7/finance/download/{slug}' + \
  50. '?period1=1579651200&period2=9999999999&interval=1d&events=history'
  51. df = pd.read_csv(url, usecols=['Date', 'Close'])
  52. return df.set_index('Date').Close
  53. out = [scrape_covid(), scrape_yahoo('BTC-USD'), scrape_yahoo('GC=F'),
  54. scrape_yahoo('^DJI')]
  55. return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones'])
  56. def wrangle_data(covid, bitcoin, gold, dow):
  57. df = pd.concat([dow, gold, bitcoin], axis=1) # Joins columns on dates.
  58. df = df.sort_index().interpolate() # Sorts by date and interpolates NaN-s.
  59. yesterday = str(datetime.date.today() - datetime.timedelta(1))
  60. df = df.loc['2020-02-23':yesterday] # Discards rows before '2020-02-23'.
  61. df = round((df / df.iloc[0]) * 100, 2) # Calculates percentages relative to day 1
  62. df = df.join(covid) # Adds column with covid cases.
  63. return df.sort_values(df.index[-1], axis=1) # Sorts columns by last day's value.
  64. def get_figure(df):
  65. figure = go.Figure()
  66. for col_name in reversed(df.columns):
  67. yaxis = 'y1' if col_name == 'Total Cases' else 'y2'
  68. colors = {'Total Cases': '#EF553B', 'Bitcoin': '#636efa', 'Gold': '#FFA15A',
  69. 'Dow Jones': '#00cc96'}
  70. trace = go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=yaxis,
  71. line=dict(color=colors[col_name]))
  72. figure.add_trace(trace)
  73. figure.update_layout(
  74. yaxis1=dict(title='Total Cases', rangemode='tozero'),
  75. yaxis2=dict(title='%', rangemode='tozero', overlaying='y', side='right'),
  76. legend=dict(x=1.1),
  77. margin=dict(t=24, b=0),
  78. paper_bgcolor='rgba(0, 0, 0, 0)'
  79. )
  80. return figure
  81. main()
  82. ###
  83. ## UTIL
  84. #
  85. def update_file(filename, figure):
  86. lines = read_file(filename)
  87. f_json = figure.to_json(pretty=True).replace('\n', '\n ')
  88. out = lines[:6] + [f' {f_json}\n', ' )\n', '};\n']
  89. write_to_file(filename, out)
  90. def read_file(filename):
  91. p = Path(__file__).resolve().parent / filename
  92. with open(p, encoding='utf-8') as file:
  93. return file.readlines()
  94. def write_to_file(filename, lines):
  95. p = Path(__file__).resolve().parent / filename
  96. with open(p, 'w', encoding='utf-8') as file:
  97. file.writelines(lines)
  98. def write_to_png_file(filename, figure, width, height):
  99. p = Path(__file__).resolve().parent / filename
  100. figure.write_image(str(p), width=width, height=height)
  101. if __name__ == '__main__':
  102. main()