Browse Source

Merge 47c1116989 into d252dfebe9

pull/116/merge
Fluency 1 week ago
committed by GitHub
parent
commit
3c219cc4b9
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
4 changed files with 101 additions and 0 deletions
  1. 15
      pdf/create_index.py
  2. 12
      pdf/remove_links.py
  3. 20
      web/convert_table.py
  4. 54
      web/update_plots.py

15
pdf/create_index.py

@ -13,6 +13,13 @@ from collections import defaultdict
def main():
"""
This function reads the file index.html and extracts all headings from it.
It then creates a dictionary with the first letter of each heading as key,
and for each key, a dictionary containing all headings starting with that letter as value.
The second level of dictionaries contain the actual heading
text as keys and their ID's (which are also stored in another list) as values.
"""
html = read_file('index.html')
doc = BeautifulSoup(''.join(html), 'html.parser')
hhh = defaultdict(lambda: defaultdict(list))
@ -26,6 +33,14 @@ def main():
def print_hhh(hhh):
"""
Prints a table of contents for the commands in the given dictionary.
The keys of `hhh` are letters and each letter is mapped to another dictionary
that maps command names to lists of links. The first link in each list is used as the heading for that command name, so it should be unique among all
commands (and ideally short). All other links should be unique among all lists but not necessarily short. The order of letters and commands within a
letter will match their order in `hhh`.
"""
letters = hhh.keys()
for letter in sorted(letters):
hh = hhh[letter]

12
pdf/remove_links.py

@ -32,6 +32,18 @@ MATCHES = {
def main():
"""
Replaces all occurrences of `from_` with `to_` in the file at the given path.
Args:
from_ (str): The string to be replaced.
to_ (str): The
replacement string.
index_path (Path): Path object representing the file that is being modified by this function call. This parameter is not
optional, and it has a default value of None because we are required to pass it as an argument but we have no use for it in our code since we don't
need access to its attributes or methods while calling this function; therefore, there's no sensible default value for us to provide here.
"""
index_path = Path('..', 'index.html')
lines = read_file(index_path)
out = ''.join(lines)

20
web/convert_table.py

@ -1,7 +1,20 @@
#!/usr/bin/env python3
def convert_table(lines):
"""
Convert a table from ASCII art to Unicode box drawing characters or vice versa.
:param lines: A list of strings representing the lines of the table.
:type lines: list(str)
"""
def from_ascii():
"""
Convert a list of lines from an ASCII table to a reStructuredText grid table.
:param lines: A list of strings representing the rows in the ASCII
table.
:returns: A string containing the equivalent reStructuredText grid table.
"""
out = []
first, header, third, *body, last = lines
first = first.translate(str.maketrans({'-': '', '+': ''}))
@ -18,6 +31,13 @@ def convert_table(lines):
out.append(f'┗{last[1:-1]}┛')
return '\n'.join(out)
def from_unicode():
"""
Convert a Unicode box-drawing character string to ASCII.
:param str lines: A string of Unicode box-drawing characters.
:returns str out: The same
text with all the Unicode box drawing characters replaced by ASCII ones.
"""
out = []
for line in lines:
line = line.translate(str.maketrans('┏┓┗┛┠┼┨┯┷━─┃│', '+++++++++--||'))

54
web/update_plots.py

@ -12,6 +12,10 @@ import re
def main():
"""
This function scrapes the data from the web and wrangles it into a pandas DataFrame.
It then creates an interactive plotly line graph of covid cases.
"""
print('Updating covid deaths...')
update_covid_deaths()
print('Updating covid cases...')
@ -19,6 +23,11 @@ def main():
def update_covid_deaths():
"""
Update the plot of global COVID-19 deaths over time.
:param df: A pandas DataFrame with columns 'Continent', 'Date', and 'Total Deaths per Million'.
"""
covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
usecols=['iso_code', 'date', 'total_deaths', 'population'])
continents = pd.read_csv('https://gist.githubusercontent.com/stevewithington/20a69c0b6d2ff'
@ -41,7 +50,15 @@ def update_covid_deaths():
def update_confirmed_cases():
"""
Update the file covid_cases.js with a plot of total cases, gold price, bitcoin price and Dow Jones index.
"""
def main():
"""
This function scrapes the data from the web and wrangles it into a pandas DataFrame.
It then creates an interactive plotly line graph of covid cases
in New York State.
"""
df = wrangle_data(*scrape_data())
f = get_figure(df)
update_file('covid_cases.js', f)
@ -49,11 +66,29 @@ def update_confirmed_cases():
write_to_png_file('covid_cases.png', f, width=960, height=315)
def scrape_data():
"""
This function scrapes data from the following sources:
1. Our World in Data (Total Cases)
2. Yahoo Finance (Bitcoin, Gold, Dow Jones)
The
function returns a list of pandas Series objects containing the scraped data.
"""
def scrape_covid():
"""
This function scrapes the total number of covid cases from a csv file on the internet.
"""
url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
df = pd.read_csv(url, usecols=['location', 'date', 'total_cases'])
return df[df.location == 'World'].set_index('date').total_cases
def scrape_yahoo(slug):
"""
Downloads historical stock price data from Yahoo Finance.
:param str slug: The ticker symbol of the desired security. Expected to be a valid argument
for the `yfinance` function `Ticker()`.
:returns pd.Series(float): A pandas Series with timestamps as indices and adjusted closing prices as values,
sorted by timestamp in ascending order.
"""
url = f'https://query1.finance.yahoo.com/v7/finance/download/{slug}' + \
'?period1=1579651200&period2=9999999999&interval=1d&events=history'
df = pd.read_csv(url, usecols=['Date', 'Close'])
@ -63,6 +98,14 @@ def update_confirmed_cases():
return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones'])
def wrangle_data(covid, bitcoin, gold, dow):
"""
This function joins the Dow Jones, Gold and Bitcoin dataframes into a single one.
It then sorts them by date and interpolates missing values. It
discards rows before '2020-02-23'.
Finally it calculates percentages relative to day 1 of each series (Dow Jones, Gold, Bitcoin)
and adds a column
with covid cases. The result is returned as a new dataframe sorted by date in descending order.
"""
df = pd.concat([dow, gold, bitcoin], axis=1) # Joins columns on dates.
df = df.sort_index().interpolate() # Sorts by date and interpolates NaN-s.
yesterday = str(datetime.date.today() - datetime.timedelta(1))
@ -72,6 +115,11 @@ def update_confirmed_cases():
return df.sort_values(df.index[-1], axis=1) # Sorts columns by last day's value.
def get_figure(df):
"""
This function returns a plotly figure that shows the total cases of COVID-19 in the US and its economic
indicators. The data is taken from [The New
York Times](#) and retrieved using [NYT API](#).
"""
figure = go.Figure()
for col_name in reversed(df.columns):
yaxis = 'y1' if col_name == 'Total Cases' else 'y2'
@ -97,6 +145,12 @@ def update_confirmed_cases():
#
def update_file(filename, figure):
"""
Updates the file at `filename` with the plotly figure `figure`.
:param filename: The path to a JSON file.
:param figure: The Plotly figure.
"""
lines = read_file(filename)
f_json = figure.to_json(pretty=True).replace('\n', '\n ')
out = lines[:6] + [f' {f_json}\n', ' )\n', '};\n']

Loading…
Cancel
Save