From 09759557874bdec4adc6cdbef3860734a47834ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Thu, 17 Oct 2024 13:35:27 +0200 Subject: [PATCH] Big changes in Pandas and Plotly --- README.md | 99 +++++++++++++++++++-------------------- index.html | 104 ++++++++++++++++++++--------------------- parse.js | 1 + pdf/index_for_pdf.html | 16 +++---- 4 files changed, 110 insertions(+), 110 deletions(-) diff --git a/README.md b/README.md index 723e817..1386b36 100644 --- a/README.md +++ b/README.md @@ -3155,7 +3155,7 @@ import pandas as pd, matplotlib.pyplot as plt **Ordered dictionary with a name.** ```python ->>> pd.Series([1, 2], index=['x', 'y'], name='a') +>>> sr = pd.Series([1, 2], index=['x', 'y'], name='a'); sr x 1 y 2 Name: a, dtype: int64 @@ -3203,7 +3203,7 @@ plt.show() # Displays the plot. Also plt.sav ``` ```python ->>> sr = pd.Series([2, 3], index=['x', 'y']) +>>> sr = pd.Series([2, 3], index=['x', 'y']); sr x 2 y 3 ``` @@ -3234,7 +3234,7 @@ y 3 **Table with labeled rows and columns.** ```python ->>> pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']) +>>> l = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']); l x y a 1 2 b 3 4 @@ -3270,13 +3270,14 @@ b 3 4 = .sort_values(col_key/s) # Sorts rows by passed column/s. Also `axis=1`. ``` +```python +.plot.line/area/bar/scatter(x=col_key, …) # `y=col_key/s`. Also hist/box(by=col_key). +plt.show() # Displays the plot. Also plt.savefig(). +``` + #### DataFrame — Merge, Join, Concat: ```python ->>> l = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']) - x y -a 1 2 -b 3 4 ->>> r = pd.DataFrame([[4, 5], [6, 7]], index=['b', 'c'], columns=['y', 'z']) +>>> r = pd.DataFrame([[4, 5], [6, 7]], index=['b', 'c'], columns=['y', 'z']); r y z b 4 5 c 6 7 @@ -3323,7 +3324,7 @@ c 6 7 * **All operations operate on columns by default. Pass `'axis=1'` to process the rows instead.** ```python ->>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']) +>>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']); df x y a 1 2 b 3 4 @@ -3350,15 +3351,11 @@ b 3 4 ``` * **Use `'[col_key_1, col_key_2][row_key]'` to get the fifth result's values.** -#### DataFrame — Plot, Encode, Decode: -```python -.plot.line/area/bar/scatter(x=col_key, …) # `y=col_key/s`. Also hist/box(by=col_key). -plt.show() # Displays the plot. Also plt.savefig(). -``` +#### DataFrame — Encode, Decode: ```python = pd.read_json/html('') # Run `$ pip3 install beautifulsoup4 lxml`. - = pd.read_csv('') # `header/index_col/dtype/parse_dates/…=`. + = pd.read_csv('') # `header/index_col/dtype/usecols/…=`. = pd.read_pickle/excel('') # Use `sheet_name=None` to get all Excel sheets. = pd.read_sql('', ) # SQLite3/SQLAlchemy connection (see #SQLite). ``` @@ -3369,23 +3366,29 @@ plt.show() # Displays the plot. Also plt.sav .to_pickle/excel() # Run `$ pip3 install "pandas[excel]" odfpy`. .to_sql('', ) # Also `if_exists='fail/replace/append'`. ``` +* **Read\_csv() only parses dates of columns that were specified by 'parse\_dates' argument. It automatically tries to detect the format, but it can be helped with 'date\_format' or 'datefirst' arguments. Both dates and datetimes get stored as pd.Timestamp objects.** +* **If there's a single invalid date then it returns the whole column as a series of strings, unlike `' = pd.to_datetime(, errors="coerce")'`, which uses pd.NaT.** +* **To get specific attributes from a series of Timestamps use `'.dt.year/date/…'`.** ### GroupBy **Object that groups together rows of a dataframe based on the value of the passed column.** ```python >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], list('abc'), list('xyz')) ->>> df.groupby('z').get_group(6) +>>> gb = df.groupby('z'); gb.apply(print) + x y z +a 1 2 3 x y z b 4 5 6 c 7 8 6 ``` ```python - = .groupby(column_key/s) # Splits DF into groups based on passed column. + = .groupby(col_key/s) # Splits DF into groups based on passed column. = .apply() # Maps each group. Func can return DF, Sr or el. - = [column_key] # Single column GB. All operations return a Sr. + = .get_group() # Selects a group by grouping column's value. = .size() # A Sr of group sizes. Same keys as get_group(). + = [col_key] # Single column GB. All operations return a Sr. ``` #### GroupBy — Aggregate, Transform, Map: @@ -3396,37 +3399,20 @@ c 7 8 6 ``` ```python ->>> gb = df.groupby('z'); gb.apply(print) - x y z -a 1 2 3 - x y z -b 4 5 6 -c 7 8 6 -``` - -```text -+-----------------+-------------+-------------+-------------+---------------+ -| | 'sum' | 'rank' | ['rank'] | {'x': 'rank'} | -+-----------------+-------------+-------------+-------------+---------------+ -| gb.agg(…) | x y | | x y | | -| | z | x y | rank rank | x | -| | 3 1 2 | a 1 1 | a 1 1 | a 1 | -| | 6 11 13 | b 1 1 | b 1 1 | b 1 | -| | | c 2 2 | c 2 2 | c 2 | -+-----------------+-------------+-------------+-------------+---------------+ -| gb.transform(…) | x y | x y | | | -| | a 1 2 | a 1 1 | | | -| | b 11 13 | b 1 1 | | | -| | c 11 13 | c 2 2 | | | -+-----------------+-------------+-------------+-------------+---------------+ +>>> gb.sum() + x y +z +3 1 2 +6 11 13 ``` +* **Result has a named index that creates column `'z'` instead of `'index'` on reset_index().** ### Rolling **Object for rolling window calculations.** ```python = .rolling(win_size) # Also: `min_periods=None, center=False`. - = [column_key/s] # Or: .column_key + = [col_key/s] # Or: .col_key = .mean/sum/max() # Or: .apply/agg() ``` @@ -3435,10 +3421,20 @@ Plotly ------ ```python # $ pip3 install pandas plotly kaleido -import pandas as pd, plotly.express as ex -
= ex.line(, x=, y=) # Or: ex.line(x=, y=) -
.update_layout(margin=dict(t=0, r=0, b=0, l=0), …) # `paper_bgcolor='rgb(0, 0, 0)'`. -
.write_html/json/image('') # Also
.show(). +import pandas as pd, plotly.express as px +``` + +```python + = px.line(, x=col_key, y=col_key) # Or: px.line(x=, y=) +.update_layout(margin=dict(t=0, r=0, b=0, l=0)) # Also `paper_bgcolor='rgb(0, 0, 0)'`. +.write_html/json/image('') # Also .show(). +``` + +```python + = px.area/bar/box(, x=col_key, y=col_key) # Also `color=col_key`. + = px.scatter(, x=col_key, y=col_key) # Also `color/size/symbol=col_key`. + = px.scatter_3d(, x=col_key, y=col_key, …) # `z=col_key`. Also color/size/symbol. + = px.histogram(, x=col_key [, nbins=]) # Number of bins depends on DF size. ``` #### Displays a line chart of total coronavirus deaths per million grouped by continent: @@ -3457,7 +3453,7 @@ df = df.groupby(['Continent_Name', 'date']).sum().reset_index() df['Total Deaths per Million'] = df.total_deaths * 1e6 / df.population df = df[df.date > '2020-03-14'] df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns') -ex.line(df, x='Date', y='Total Deaths per Million', color='Continent').show() +px.line(df, x='Date', y='Total Deaths per Million', color='Continent').show() ``` #### Displays a multi-axis line chart of total coronavirus cases and changes in prices of Bitcoin, Dow Jones and gold: @@ -3470,20 +3466,23 @@ import pandas as pd, plotly.graph_objects as go def main(): covid, bitcoin, gold, dow = scrape_data() - display_data(wrangle_data(covid, bitcoin, gold, dow)) + df = wrangle_data(covid, bitcoin, gold, dow) + display_data(df) def scrape_data(): def get_covid_cases(): url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv' df = pd.read_csv(url, usecols=['location', 'date', 'total_cases']) - return df[df.location == 'World'].set_index('date').total_cases + df = df[df.location == 'World'] + return df.set_index('date').total_cases def get_ticker(symbol): url = (f'https://query1.finance.yahoo.com/v7/finance/download/{symbol}?' 'period1=1579651200&period2=9999999999&interval=1d&events=history') df = pd.read_csv(url, usecols=['Date', 'Close']) return df.set_index('Date').Close out = get_covid_cases(), get_ticker('BTC-USD'), get_ticker('GC=F'), get_ticker('^DJI') - return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones']) + names = ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones'] + return map(pd.Series.rename, out, names) def wrangle_data(covid, bitcoin, gold, dow): df = pd.concat([bitcoin, gold, dow], axis=1) # Creates table by joining columns on dates. diff --git a/index.html b/index.html index 4cc4645..850f4b9 100644 --- a/index.html +++ b/index.html @@ -54,7 +54,7 @@
- +
@@ -2571,7 +2571,7 @@ W, H, MAX_S = 50, 50< import pandas as pd, matplotlib.pyplot as plt -

Series

Ordered dictionary with a name.

>>> pd.Series([1, 2], index=['x', 'y'], name='a')
+

Series

Ordered dictionary with a name.

>>> sr = pd.Series([1, 2], index=['x', 'y'], name='a'); sr
 x    1
 y    2
 Name: a, dtype: int64
@@ -2605,7 +2605,7 @@ plt.show()                                     # Disp
 <Sr> = <Sr>.fillna(<el>)                       # Or: <Sr>.agg/transform/map(lambda <el>: <el>)
 
-
>>> sr = pd.Series([2, 3], index=['x', 'y'])
+
>>> sr = pd.Series([2, 3], index=['x', 'y']); sr
 x    2
 y    3
 
@@ -2630,7 +2630,7 @@ y 3
  • Methods ffill(), interpolate(), fillna() and dropna() accept 'inplace=True'.
  • Last result has a hierarchical index. Use '<Sr>[key_1, key_2]' to get its values.
  • -

    DataFrame

    Table with labeled rows and columns.

    >>> pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
    +

    DataFrame

    Table with labeled rows and columns.

    >>> l = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']); l
        x  y
     a  1  2
     b  3  4
    @@ -2657,11 +2657,10 @@ b  3  4
     <DF>    = <DF>.sort_index(ascending=True)      # Sorts rows by row keys. Use `axis=1` for cols.
     <DF>    = <DF>.sort_values(col_key/s)          # Sorts rows by passed column/s. Also `axis=1`.
     
    -

    DataFrame — Merge, Join, Concat:

    >>> l = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
    -   x  y
    -a  1  2
    -b  3  4
    ->>> r = pd.DataFrame([[4, 5], [6, 7]], index=['b', 'c'], columns=['y', 'z'])
    +
    <DF>.plot.line/area/bar/scatter(x=col_key, …)  # `y=col_key/s`. Also hist/box(by=col_key).
    +plt.show()                                     # Displays the plot. Also plt.savefig(<path>).
    +
    +

    DataFrame — Merge, Join, Concat:

    >>> r = pd.DataFrame([[4, 5], [6, 7]], index=['b', 'c'], columns=['y', 'z']); r
        y  z
     b  4  5
     c  6  7
    @@ -2705,7 +2704,7 @@ c  6  7
     
    • All operations operate on columns by default. Pass 'axis=1' to process the rows instead.
    -
    >>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
    +
    >>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']); df
        x  y
     a  1  2
     b  3  4
    @@ -2730,72 +2729,70 @@ b  3  4
     
    • Use '<DF>[col_key_1, col_key_2][row_key]' to get the fifth result's values.
    -

    DataFrame — Plot, Encode, Decode:

    <DF>.plot.line/area/bar/scatter(x=col_key, …)  # `y=col_key/s`. Also hist/box(by=col_key).
    -plt.show()                                     # Displays the plot. Also plt.savefig(<path>).
    -
    - -
    <DF> = pd.read_json/html('<str/path/url>')     # Run `$ pip3 install beautifulsoup4 lxml`.
    -<DF> = pd.read_csv('<path/url>')               # `header/index_col/dtype/parse_dates/…=<obj>`.
    +

    DataFrame — Encode, Decode:

    <DF> = pd.read_json/html('<str/path/url>')     # Run `$ pip3 install beautifulsoup4 lxml`.
    +<DF> = pd.read_csv('<path/url>')               # `header/index_col/dtype/usecols/…=<obj>`.
     <DF> = pd.read_pickle/excel('<path/url>')      # Use `sheet_name=None` to get all Excel sheets.
     <DF> = pd.read_sql('<table/query>', <conn.>)   # SQLite3/SQLAlchemy connection (see #SQLite).
    -
    +
    +
    <dict> = <DF>.to_dict('d/l/s/…')               # Returns columns as dicts, lists or series.
     <str>  = <DF>.to_json/html/csv/latex()         # Saves output to file if path is passed.
     <DF>.to_pickle/excel(<path>)                   # Run `$ pip3 install "pandas[excel]" odfpy`.
     <DF>.to_sql('<table_name>', <connection>)      # Also `if_exists='fail/replace/append'`.
     
    +
      +
    • Read_csv() only parses dates of columns that were specified by 'parse_dates' argument. It automatically tries to detect the format, but it can be helped with 'date_format' or 'datefirst' arguments. Both dates and datetimes get stored as pd.Timestamp objects.
    • +
    • If there's a single invalid date then it returns the whole column as a series of strings, unlike '<Sr> = pd.to_datetime(<Sr>, errors="coerce")', which uses pd.NaT.
    • +
    • To get specific attributes from a series of Timestamps use '<Sr>.dt.year/date/…'.
    • +

    GroupBy

    Object that groups together rows of a dataframe based on the value of the passed column.

    >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], list('abc'), list('xyz'))
    ->>> df.groupby('z').get_group(6)
    +>>> gb = df.groupby('z'); gb.apply(print)
    +   x  y  z
    +a  1  2  3
        x  y  z
     b  4  5  6
    -c  7  8  6
    -
    +c 7 8 6
    -
    <GB> = <DF>.groupby(column_key/s)              # Splits DF into groups based on passed column.
    +
    <GB> = <DF>.groupby(col_key/s)                 # Splits DF into groups based on passed column.
     <DF> = <GB>.apply(<func>)                      # Maps each group. Func can return DF, Sr or el.
    -<GB> = <GB>[column_key]                        # Single column GB. All operations return a Sr.
    +<DF> = <GB>.get_group(<num>)                   # Selects a group by grouping column's value.
     <Sr> = <GB>.size()                             # A Sr of group sizes. Same keys as get_group().
    +<GB> = <GB>[col_key]                           # Single column GB. All operations return a Sr.
     

    GroupBy — Aggregate, Transform, Map:

    <DF> = <GB>.sum/max/mean/idxmax/all()          # Or: <GB>.agg(lambda <Sr>: <el>)
     <DF> = <GB>.rank/diff/cumsum/ffill()           # Or: <GB>.transform(lambda <Sr>: <Sr>)
     <DF> = <GB>.fillna(<el>)                       # Or: <GB>.transform(lambda <Sr>: <Sr>)
     
    -
    >>> gb = df.groupby('z'); gb.apply(print)
    -   x  y  z
    -a  1  2  3
    -   x  y  z
    -b  4  5  6
    -c  7  8  6
    -
    ┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
    -┃                 │    'sum''rank'   │   ['rank']  │ {'x': 'rank'} ┃
    -┠─────────────────┼─────────────┼─────────────┼─────────────┼───────────────┨
    -┃ gb.agg(…)       │      x   y  │             │      x    y │               ┃
    -┃                 │  z          │      x  y   │   rank rank │        x      ┃
    -┃                 │  3   1   2  │   a  1  1   │ a    1    1 │     a  1      ┃
    -┃                 │  6  11  13  │   b  1  1   │ b    1    1 │     b  1      ┃
    -┃                 │             │   c  2  2   │ c    2    2 │     c  2      ┃
    -┠─────────────────┼─────────────┼─────────────┼─────────────┼───────────────┨
    -┃ gb.transform(…) │      x   y  │      x  y   │             │               ┃
    -┃                 │  a   1   2  │   a  1  1   │             │               ┃
    -┃                 │  b  11  13  │   b  1  1   │             │               ┃
    -┃                 │  c  11  13  │   c  2  2   │             │               ┃
    -┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
    +
    >>> gb.sum()
    +    x   y
    +z
    +3   1   2
    +6  11  13
     
    +
      +
    • Result has a named index that creates column 'z' instead of 'index' on reset_index().
    • +

    Rolling

    Object for rolling window calculations.

    <RSr/RDF/RGB> = <Sr/DF/GB>.rolling(win_size)   # Also: `min_periods=None, center=False`.
    -<RSr/RDF/RGB> = <RDF/RGB>[column_key/s]        # Or: <RDF/RGB>.column_key
    +<RSr/RDF/RGB> = <RDF/RGB>[col_key/s]           # Or: <RDF/RGB>.col_key
     <Sr/DF>       = <R>.mean/sum/max()             # Or: <R>.apply/agg(<agg_func/str>)
     

    #Plotly

    # $ pip3 install pandas plotly kaleido
    -import pandas as pd, plotly.express as ex
    -<Figure> = ex.line(<DF>, x=<col_name>, y=<col_name>)        # Or: ex.line(x=<list>, y=<list>)
    -<Figure>.update_layout(margin=dict(t=0, r=0, b=0, l=0), …)  # `paper_bgcolor='rgb(0, 0, 0)'`.
    -<Figure>.write_html/json/image('<path>')                    # Also <Figure>.show().
    +import pandas as pd, plotly.express as px
     
    +
    <Fig> = px.line(<DF>, x=col_key, y=col_key)            # Or: px.line(x=<list>, y=<list>)
    +<Fig>.update_layout(margin=dict(t=0, r=0, b=0, l=0))   # Also `paper_bgcolor='rgb(0, 0, 0)'`.
    +<Fig>.write_html/json/image('<path>')                  # Also <Fig>.show().
    +
    +
    <Fig> = px.area/bar/box(<DF>, x=col_key, y=col_key)    # Also `color=col_key`.
    +<Fig> = px.scatter(<DF>, x=col_key, y=col_key)         # Also `color/size/symbol=col_key`.
    +<Fig> = px.scatter_3d(<DF>, x=col_key, y=col_key, …)   # `z=col_key`. Also color/size/symbol.
    +<Fig> = px.histogram(<DF>, x=col_key [, nbins=<int>])  # Number of bins depends on DF size.
    +

    Displays a line chart of total coronavirus deaths per million grouped by continent:

    covid = pd.read_csv('https://raw.githubusercontent.com/owid/covid-19-data/8dde8ca49b'
                         '6e648c17dd420b2726ca0779402651/public/data/owid-covid-data.csv',
                         usecols=['iso_code', 'date', 'total_deaths', 'population'])
    @@ -2806,7 +2803,7 @@ df = df.groupby(['Continent_Name', 'Total Deaths per Million'] = df.total_deaths * 1e6 / df.population
     df = df[df.date > '2020-03-14']
     df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')
    -ex.line(df, x='Date', y='Total Deaths per Million', color='Continent').show()
    +px.line(df, x='Date', y='Total Deaths per Million', color='Continent').show()
     
    @@ -2815,20 +2812,23 @@ ex.line(df, x='Date', y=def main(): covid, bitcoin, gold, dow = scrape_data() - display_data(wrangle_data(covid, bitcoin, gold, dow)) + df = wrangle_data(covid, bitcoin, gold, dow) + display_data(df) def scrape_data(): def get_covid_cases(): url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv' df = pd.read_csv(url, usecols=['location', 'date', 'total_cases']) - return df[df.location == 'World'].set_index('date').total_cases + df = df[df.location == 'World'] + return df.set_index('date').total_cases def get_ticker(symbol): url = (f'https://query1.finance.yahoo.com/v7/finance/download/{symbol}?' 'period1=1579651200&period2=9999999999&interval=1d&events=history') df = pd.read_csv(url, usecols=['Date', 'Close']) return df.set_index('Date').Close out = get_covid_cases(), get_ticker('BTC-USD'), get_ticker('GC=F'), get_ticker('^DJI') - return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones']) + names = ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones'] + return map(pd.Series.rename, out, names) def wrangle_data(covid, bitcoin, gold, dow): df = pd.concat([bitcoin, gold, dow], axis=1) # Creates table by joining columns on dates. @@ -2926,7 +2926,7 @@ $ deactivate # Deactivates the active
    - +
    diff --git a/parse.js b/parse.js index f1839ac..37fe0d2 100755 --- a/parse.js +++ b/parse.js @@ -310,6 +310,7 @@ const MARIO = ' main()\n'; const GROUPBY = + '>>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], list(\'abc\'), list(\'xyz\'))\n' + '>>> gb = df.groupby(\'z\'); gb.apply(print)\n' + ' x y z\n' + 'a 1 2 3\n' + diff --git a/pdf/index_for_pdf.html b/pdf/index_for_pdf.html index 6f8fef1..ef13e17 100644 --- a/pdf/index_for_pdf.html +++ b/pdf/index_for_pdf.html @@ -30,7 +30,7 @@ copy function, 15
    coroutine, 33
    counter, 2, 4, 12, 17
    -csv, 26, 34, 46, 47
    +csv, 26, 34, 46, 47
    curses module, 33, 34
    cython, 15, 49

    D

    @@ -43,11 +43,11 @@

    E

    enum module, 19-20
    enumerate function, 3
    -excel, 46
    +excel, 46
    exceptions, 20-21, 23, 32
    exit function, 21

    F

    -

    files, 22-29, 34, 46
    +

    files, 22-29, 34, 46
    filter function, 11
    flask library, 36
    floats, 4, 6, 7
    @@ -73,7 +73,7 @@ iterator, 3-4, 11, 17
    itertools module, 3, 8

    J

    -

    json, 25, 36, 46

    +

    json, 25, 36, 46

    L

    lambda, 11
    lists, 1-2, 4, 11, 18-19, 21
    @@ -82,7 +82,7 @@

    M

    main function, 1, 49
    match statement, 31
    -matplotlib library, 34, 44, 46
    +matplotlib library, 34, 44, 46
    map function, 11, 31
    math module, 7
    memoryviews, 29
    @@ -102,7 +102,7 @@ paths, 23-24, 34
    pickle module, 25
    pillow library, 39-40
    -plotting, 34, 44, 46, 47-48
    +plotting, 34, 44, 46, 47-48
    print function, 14, 22
    profiling, 36-37
    progress bar, 34
    @@ -119,14 +119,14 @@ requests library, 35, 36

    S

    scope, 10, 12, 20
    -scraping, 35, 43, 46, 47-48
    +scraping, 35, 43, 46, 47-48
    sequence, 4, 18-19
    sets, 2, 4, 11, 19, 21
    shell commands, 25
    sleep function, 34
    sortable, 1, 16
    splat operator, 10-11, 26
    -sql, 27, 46
    +sql, 27, 46
    statistics, 7, 37-38, 44-48
    strings, 4-7, 14
    struct module, 28-29