diff --git a/README.md b/README.md index e905c08..070fda5 100644 --- a/README.md +++ b/README.md @@ -1740,9 +1740,9 @@ shutil.rmtree() # Deletes the directory. ### Shell Commands ```python - = os.popen('') # Executes command in sh/cmd. Returns its stdout pipe. + = os.popen('') # Executes commands in sh/cmd. Returns combined stdout. = .read(size=-1) # Reads 'size' chars or until EOF. Also readline/s(). - = .close() # Closes the pipe. Returns None on success (returncode 0). + = .close() # Returns None if last command exited with returncode 0. ``` #### Sends '1 + 1' to the basic calculator and captures its output: @@ -3146,6 +3146,8 @@ if __name__ == '__main__': Pandas ------ +**Data analysis library. For examples see [Plotly](#displaysalinechartoftotalcoronavirusdeathspermilliongroupedbycontinent).** + ```python # $ pip3 install pandas matplotlib import pandas as pd, matplotlib.pyplot as plt @@ -3155,65 +3157,61 @@ import pandas as pd, matplotlib.pyplot as plt **Ordered dictionary with a name.** ```python ->>> sr = pd.Series([1, 2], index=['x', 'y'], name='a'); sr +>>> s = pd.Series([1, 2], index=['x', 'y'], name='a'); s x 1 y 2 Name: a, dtype: int64 ``` ```python - = pd.Series() # Assigns RangeIndex starting at 0. - = pd.Series() # Takes dictionary's keys for index. - = pd.Series(, index=) # Only keeps items with keys specified in index. + = pd.Series() # Assigns RangeIndex starting at 0. + = pd.Series() # Takes dictionary's keys for index. + = pd.Series(, index=) # Only keeps items with keys specified in index. ``` ```python - = .loc[key] # Or: .iloc[i] - = .loc[coll_of_keys] # Or: .iloc[coll_of_i] - = .loc[from_key : to_key_inc] # Or: .iloc[from_i : to_i_exc] + = .loc[key] # Or: .iloc[i] + = .loc[coll_of_keys] # Or: .iloc[coll_of_i] + = .loc[from_key : to_key_inc] # Or: .iloc[from_i : to_i_exc] ``` ```python - = [key/i] # Or: . - = [coll_of_keys/coll_of_i] # Or: [key/i : key/i] - = [bools] # Or: .loc/iloc[bools] + = [key/i] # Or: . + = [coll_of_keys/coll_of_i] # Or: [key/i : key/i] + = [bools] # Or: .loc/iloc[bools] ``` ```python - = > # Returns a Series of bools. - = + # Items with non-matching keys get value NaN. + = > # Returns a Series of bools. + = + # Items with non-matching keys get value NaN. ``` ```python - = pd.concat() # Concats multiple series into one long Series. - = .combine_first() # Adds items that are not yet present. -.update() # Updates items that are already present. + = pd.concat() # Concats multiple series into one long Series. + = .combine_first() # Adds items that are not yet present. +.update() # Updates items that are already present. ``` ```python -.plot.line/area/bar/pie/hist() # Generates a Matplotlib plot. +.plot.line/area/bar/pie/hist() # Generates a Matplotlib plot. plt.show() # Displays the plot. Also plt.savefig(). ``` +* **Indexing objects can't be tuples because `'obj[x, y]'` is converted to `'obj[(x, y)]'`!** +* **Pandas uses NumPy types like `'np.int64'`. Series is converted to `'float64'` if we assign np.nan to any item. Use `'.astype()'` to get converted Series.** #### Series — Aggregate, Transform, Map: ```python - = .sum/max/mean/idxmax/all() # Or: .agg(lambda : ) - = .rank/diff/cumsum/ffill/interpo…() # Or: .agg/transform(lambda : ) - = .fillna() # Or: .agg/transform/map(lambda : ) -``` - -```python ->>> sr = pd.Series([2, 3], index=['x', 'y']); sr -x 2 -y 3 + = .sum/max/mean/idxmax/all() # Or: .agg(lambda : ) + = .rank/diff/cumsum/ffill/interpol…() # Or: .agg/transform(lambda : ) + = .isna/fillna/isin([]) # Or: .agg/transform/map(lambda : ) ``` ```text +---------------+-------------+-------------+---------------+ | | 'sum' | ['sum'] | {'s': 'sum'} | +---------------+-------------+-------------+---------------+ -| sr.apply(…) | 5 | sum 5 | s 5 | -| sr.agg(…) | | | | +| s.apply(…) | 3 | sum 3 | s 3 | +| s.agg(…) | | | | +---------------+-------------+-------------+---------------+ ``` @@ -3221,14 +3219,13 @@ y 3 +---------------+-------------+-------------+---------------+ | | 'rank' | ['rank'] | {'r': 'rank'} | +---------------+-------------+-------------+---------------+ -| sr.apply(…) | | rank | | -| sr.agg(…) | x 1 | x 1 | r x 1 | -| | y 2 | y 2 | y 2 | +| s.apply(…) | | rank | | +| s.agg(…) | x 1.0 | x 1.0 | r x 1.0 | +| | y 2.0 | y 2.0 | y 2.0 | +---------------+-------------+-------------+---------------+ ``` -* **Indexing objects can't be tuples because `'obj[x, y]'` is converted to `'obj[(x, y)]'`!** * **Methods ffill(), interpolate(), fillna() and dropna() accept `'inplace=True'`.** -* **Last result has a hierarchical index. Use `'[key_1, key_2]'` to get its values.** +* **Last result has a multi-index. Use `'[key_1, key_2]'` to get its values.** ### DataFrame **Table with labeled rows and columns.** @@ -3241,33 +3238,39 @@ b 3 4 ``` ```python - = pd.DataFrame() # Rows can be either lists, dicts or series. - = pd.DataFrame() # Columns can be either lists, dicts or series. + = pd.DataFrame() # Rows can be either lists, dicts or series. + = pd.DataFrame() # Columns can be either lists, dicts or series. ``` ```python - = .loc[row_key, col_key] # Or: .iloc[row_i, col_i] - = .loc[row_key/s] # Or: .iloc[row_i/s] - = .loc[:, col_key/s] # Or: .iloc[:, col_i/s] - = .loc[row_bools, col_bools] # Or: .iloc[row_bools, col_bools] + = .loc[row_key, col_key] # Or: .iloc[row_i, col_i] + = .loc[row_key/s] # Or: .iloc[row_i/s] + = .loc[:, col_key/s] # Or: .iloc[:, col_i/s] + = .loc[row_bools, col_bools] # Or: .iloc[row_bools, col_bools] ``` ```python - = [col_key/s] # Or: . - = [row_bools] # Keeps rows as specified by bools. - = [] # Assigns NaN to items that are False in bools. + = [col_key/s] # Or: . + = [row_bools] # Keeps rows as specified by bools. + = [] # Assigns NaN to items that are False in bools. ``` ```python - = > # Returns DF of bools. Sr is treated as a row. - = + # Items with non-matching keys get value NaN. + = > # Returns DF of bools. S is treated as a row. + = + # Items with non-matching keys get value NaN. ``` ```python - = .set_index(col_key) # Replaces row keys with column's values. - = .reset_index(drop=False) # Drops or moves row keys to column named index. - = .sort_index(ascending=True) # Sorts rows by row keys. Use `axis=1` for cols. - = .sort_values(col_key/s) # Sorts rows by passed column/s. Also `axis=1`. + = .set_index(col_key) # Replaces row keys with column's values. + = .reset_index(drop=False) # Drops or moves row keys to column named index. + = .sort_index(ascending=True) # Sorts rows by row keys. Use `axis=1` for cols. + = .sort_values(col_key/s) # Sorts rows by passed column/s. Also `axis=1`. +``` + +```python + = .head/tail/sample() # Returns first, last, or random n elements. + = .describe() # Describes columns. Also shape, info(), corr(). + = .query('') # Filters rows with e.g. 'col_1 == val_1 and …'. ``` ```python @@ -3301,41 +3304,28 @@ c 6 7 | axis=0, | a 1 2 . | 2 | | Uses 'outer' by default. | | join=…) | b 3 4 . | 4 | | A Series is treated as a | | | b . 4 5 | 4 | | column. To add a row use | -| | c . 6 7 | 6 | | pd.concat([l, DF([sr])]).| +| | c . 6 7 | 6 | | pd.concat([l, DF([s])]). | +------------------------+---------------+------------+------------+--------------------------+ | pd.concat([l, r], | x y y z | | | Adds columns at the | | axis=1, | a 1 2 . . | x y y z | | right end. Uses 'outer' | | join=…) | b 3 4 4 5 | 3 4 4 5 | | by default. A Series is | | | c . . 6 7 | | | treated as a column. | +------------------------+---------------+------------+------------+--------------------------+ -| l.combine_first(r) | x y z | | | Adds missing rows and | -| | a 1 2 . | | | columns. Also updates | -| | b 3 4 5 | | | items that contain NaN. | -| | c . 6 7 | | | Argument r must be a DF. | -+------------------------+---------------+------------+------------+--------------------------+ ``` #### DataFrame — Aggregate, Transform, Map: ```python - = .sum/max/mean/idxmax/all() # Or: .apply/agg(lambda : ) - = .rank/diff/cumsum/ffill/interpo…() # Or: .apply/agg/transfo…(lambda : ) - = .fillna() # Or: .applymap(lambda : ) -``` -* **All operations operate on columns by default. Pass `'axis=1'` to process the rows instead.** - -```python ->>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']); df - x y -a 1 2 -b 3 4 + = .sum/max/mean/idxmax/all() # Or: .apply/agg(lambda : ) + = .rank/diff/cumsum/ffill/interpo…() # Or: .apply/agg/transform(lambda : ) + = .isna/fillna/isin([]) # Or: .agg/transform/map(lambda : ) ``` ```text +-----------------+-------------+-------------+---------------+ | | 'sum' | ['sum'] | {'x': 'sum'} | +-----------------+-------------+-------------+---------------+ -| df.apply(…) | x 4 | x y | x 4 | -| df.agg(…) | y 6 | sum 4 6 | | +| l.apply(…) | x 4 | x y | x 4 | +| l.agg(…) | y 6 | sum 4 6 | | +-----------------+-------------+-------------+---------------+ ``` @@ -3343,16 +3333,25 @@ b 3 4 +-----------------+-------------+-------------+---------------+ | | 'rank' | ['rank'] | {'x': 'rank'} | +-----------------+-------------+-------------+---------------+ -| df.apply(…) | | x y | | -| df.agg(…) | x y | rank rank | x | -| df.transform(…) | a 1 1 | a 1 1 | a 1 | -| | b 2 2 | b 2 2 | b 2 | +| l.apply(…) | | x y | | +| l.agg(…) | x y | rank rank | x | +| l.transform(…) | a 1.0 1.0 | a 1.0 1.0 | a 1.0 | +| | b 2.0 2.0 | b 2.0 2.0 | b 2.0 | +-----------------+-------------+-------------+---------------+ ``` -* **Use `'[col_key_1, col_key_2][row_key]'` to get the fifth result's values.** +* **All methods operate on columns by default. Pass `'axis=1'` to process the rows instead.** +* **Fifth result's columns are indexed with a multi-index. This means we need a tuple of column keys to specify a single column: `'.loc[row_k, (col_k_1, col_k_2)]'`.** -#### DataFrame — Encode, Decode: +#### DataFrame — Multi-Index: +```python + = .xs(row_key, level=) # Rows with key on passed level of multi-index. + = .xs(row_keys, level=) # Rows that have first key on first level, etc. + = .set_index(col_keys) # Combines multiple columns into a multi-index. + = .stack/unstack(level=-1) # Combines col keys with row keys or vice versa. + = .pivot_table(index=col_key/s, …) # `columns=col_key/s, values=col_key/s`. +``` +#### DataFrame — Encode, Decode: ```python = pd.read_json/html('') # Run `$ pip3 install beautifulsoup4 lxml`. = pd.read_csv('') # `header/index_col/dtype/usecols/…=`. @@ -3367,53 +3366,49 @@ b 3 4 .to_sql('', ) # Also `if_exists='fail/replace/append'`. ``` * **Read\_csv() only parses dates of columns that were specified by 'parse\_dates' argument. It automatically tries to detect the format, but it can be helped with 'date\_format' or 'datefirst' arguments. Both dates and datetimes get stored as pd.Timestamp objects.** -* **If there's a single invalid date then it returns the whole column as a series of strings, unlike `' = pd.to_datetime(, errors="coerce")'`, which uses pd.NaT.** -* **To get specific attributes from a series of Timestamps use `'.dt.year/date/…'`.** +* **If there's a single invalid date then it returns the whole column as a series of strings, unlike `' = pd.to_datetime(, errors="coerce")'`, which uses pd.NaT.** +* **To get specific attributes from a series of Timestamps use `'.dt.year/date/…'`.** ### GroupBy **Object that groups together rows of a dataframe based on the value of the passed column.** -```python ->>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], list('abc'), list('xyz')) ->>> gb = df.groupby('z'); gb.apply(print) - x y z -a 1 2 3 - x y z -b 4 5 6 -c 7 8 6 -``` - ```python = .groupby(col_key/s) # Splits DF into groups based on passed column. - = .apply() # Maps each group. Func can return DF, Sr or el. + = .apply() # Maps each group. Func can return DF, S or el. = .get_group() # Selects a group by grouping column's value. - = .size() # A Sr of group sizes. Same keys as get_group(). - = [col_key] # Single column GB. All operations return a Sr. + = .size() # S of group sizes. Same keys as get_group(). + = [col_key] # Single column GB. All operations return S. ``` -#### GroupBy — Aggregate, Transform, Map: ```python - = .sum/max/mean/idxmax/all() # Or: .agg(lambda : ) - = .rank/diff/cumsum/ffill() # Or: .transform(lambda : ) - = .fillna() # Or: .transform(lambda : ) + = .sum/max/mean/idxmax/all() # Or: .agg(lambda : ) + = .rank/diff/cumsum/ffill() # Or: .transform(lambda : ) + = .fillna() # Or: .transform(lambda : ) ``` +#### Divides rows into groups and sums their columns. Result has a named index that creates column `'z'` on reset_index(): ```python +>>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], list('abc'), list('xyz')) +>>> gb = df.groupby('z'); gb.apply(print) + x y z +a 1 2 3 + x y z +b 4 5 6 +c 7 8 6 >>> gb.sum() x y z 3 1 2 6 11 13 ``` -* **Result has a named index that creates column `'z'` instead of `'index'` on reset_index().** ### Rolling **Object for rolling window calculations.** ```python - = .rolling(win_size) # Also: `min_periods=None, center=False`. - = [col_key/s] # Or: .col_key - = .mean/sum/max() # Or: .apply/agg() + = .rolling(win_size) # Also: `min_periods=None, center=False`. + = [col_key/s] # Or: .col_key + = .mean/sum/max() # Or: .apply/agg() ``` diff --git a/index.html b/index.html index a297efe..a584141 100644 --- a/index.html +++ b/index.html @@ -55,7 +55,7 @@
- +
@@ -1452,9 +1452,9 @@ shutil.rmtree(<path>) # Deletes t
  • Paths can be either strings, Paths, or DirEntry objects.
  • Functions report OS related errors by raising either OSError or one of its subclasses.
  • -

    Shell Commands

    <pipe> = os.popen('<command>')      # Executes command in sh/cmd. Returns its stdout pipe.
    +

    Shell Commands

    <pipe> = os.popen('<commands>')     # Executes commands in sh/cmd. Returns combined stdout.
     <str>  = <pipe>.read(size=-1)       # Reads 'size' chars or until EOF. Also readline/s().
    -<int>  = <pipe>.close()             # Closes the pipe. Returns None on success (returncode 0).
    +<int>  = <pipe>.close()             # Returns None if last command exited with returncode 0.
     

    Sends '1 + 1' to the basic calculator and captures its output:

    >>> subprocess.run('bc', input='1 + 1\n', capture_output=True, text=True)
    @@ -2568,68 +2568,68 @@ W, H, MAX_S = 50, 50<
         main()
     
    -

    #Pandas

    # $ pip3 install pandas matplotlib
    +

    #Pandas

    Data analysis library. For examples see Plotly.

    # $ pip3 install pandas matplotlib
     import pandas as pd, matplotlib.pyplot as plt
     
    -

    Series

    Ordered dictionary with a name.

    >>> sr = pd.Series([1, 2], index=['x', 'y'], name='a'); sr
    +
    +

    Series

    Ordered dictionary with a name.

    >>> s = pd.Series([1, 2], index=['x', 'y'], name='a'); s
     x    1
     y    2
     Name: a, dtype: int64
     
    -
    <Sr> = pd.Series(<list>)                       # Assigns RangeIndex starting at 0.
    -<Sr> = pd.Series(<dict>)                       # Takes dictionary's keys for index.
    -<Sr> = pd.Series(<dict/Series>, index=<list>)  # Only keeps items with keys specified in index.
    +
    <S>  = pd.Series(<list>)                       # Assigns RangeIndex starting at 0.
    +<S>  = pd.Series(<dict>)                       # Takes dictionary's keys for index.
    +<S>  = pd.Series(<dict/Series>, index=<list>)  # Only keeps items with keys specified in index.
     
    -
    <el> = <Sr>.loc[key]                           # Or: <Sr>.iloc[i]
    -<Sr> = <Sr>.loc[coll_of_keys]                  # Or: <Sr>.iloc[coll_of_i]
    -<Sr> = <Sr>.loc[from_key : to_key_inc]         # Or: <Sr>.iloc[from_i : to_i_exc]
    +
    <el> = <S>.loc[key]                            # Or: <S>.iloc[i]
    +<S>  = <S>.loc[coll_of_keys]                   # Or: <S>.iloc[coll_of_i]
    +<S>  = <S>.loc[from_key : to_key_inc]          # Or: <S>.iloc[from_i : to_i_exc]
     
    -
    <el> = <Sr>[key/i]                             # Or: <Sr>.<key>
    -<Sr> = <Sr>[coll_of_keys/coll_of_i]            # Or: <Sr>[key/i : key/i]
    -<Sr> = <Sr>[bools]                             # Or: <Sr>.loc/iloc[bools]
    +
    <el> = <S>[key/i]                              # Or: <S>.<key>
    +<S>  = <S>[coll_of_keys/coll_of_i]             # Or: <S>[key/i : key/i]
    +<S>  = <S>[bools]                              # Or: <S>.loc/iloc[bools]
     
    -
    <Sr> = <Sr> > <el/Sr>                          # Returns a Series of bools.
    -<Sr> = <Sr> + <el/Sr>                          # Items with non-matching keys get value NaN.
    +
    <S>  = <S> > <el/S>                            # Returns a Series of bools.
    +<S>  = <S> + <el/S>                            # Items with non-matching keys get value NaN.
     
    -
    <Sr> = pd.concat(<coll_of_Sr>)                 # Concats multiple series into one long Series.
    -<Sr> = <Sr>.combine_first(<Sr>)                # Adds items that are not yet present.
    -<Sr>.update(<Sr>)                              # Updates items that are already present.
    +
    <S> = pd.concat(<coll_of_S>)                   # Concats multiple series into one long Series.
    +<S> = <S>.combine_first(<S>)                   # Adds items that are not yet present.
    +<S>.update(<S>)                                # Updates items that are already present.
     
    -
    <Sr>.plot.line/area/bar/pie/hist()             # Generates a Matplotlib plot.
    +
    <S>.plot.line/area/bar/pie/hist()              # Generates a Matplotlib plot.
     plt.show()                                     # Displays the plot. Also plt.savefig(<path>).
     
    -

    Series — Aggregate, Transform, Map:

    <el> = <Sr>.sum/max/mean/idxmax/all()          # Or: <Sr>.agg(lambda <Sr>: <el>)
    -<Sr> = <Sr>.rank/diff/cumsum/ffill/interpo…()  # Or: <Sr>.agg/transform(lambda <Sr>: <Sr>)
    -<Sr> = <Sr>.fillna(<el>)                       # Or: <Sr>.agg/transform/map(lambda <el>: <el>)
    +
      +
    • Indexing objects can't be tuples because 'obj[x, y]' is converted to 'obj[(x, y)]'!
    • +
    • Pandas uses NumPy types like 'np.int64'. Series is converted to 'float64' if we assign np.nan to any item. Use '<S>.astype(<str/type>)' to get converted Series.
    • +
    +

    Series — Aggregate, Transform, Map:

    <el> = <S>.sum/max/mean/idxmax/all()           # Or: <S>.agg(lambda <S>: <el>)
    +<S>  = <S>.rank/diff/cumsum/ffill/interpol…()  # Or: <S>.agg/transform(lambda <S>: <S>)
    +<S>  = <S>.isna/fillna/isin([<el/coll>])       # Or: <S>.agg/transform/map(lambda <el>: <el>)
     
    -
    >>> sr = pd.Series([2, 3], index=['x', 'y']); sr
    -x    2
    -y    3
    -
    ┏━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
     ┃               │    'sum'    │   ['sum']   │ {'s': 'sum'}  ┃
     ┠───────────────┼─────────────┼─────────────┼───────────────┨
    -┃ sr.apply(…)   │      5      │    sum  5   │     s  5      ┃
    -┃ sr.agg(…)     │             │             │               ┃
    +┃ s.apply(…)    │      3      │    sum  3   │     s  3      ┃
    +┃ s.agg(…)      │             │             │               ┃
     ┗━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
     
     ┏━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
     ┃               │    'rank'   │   ['rank']  │ {'r': 'rank'} ┃
     ┠───────────────┼─────────────┼─────────────┼───────────────┨
    -┃ sr.apply(…)   │             │      rank   │               ┃
    -┃ sr.agg(…)     │     x  1    │   x     1   │    r  x  1    ┃
    -┃               │     y  2    │   y     2   │       y  2    ┃
    +┃ s.apply(…)    │             │      rank   │               ┃
    +┃ s.agg(…)      │    x  1.0   │   x   1.0   │   r  x  1.0   ┃
    +┃               │    y  2.0   │   y   2.0   │      y  2.0   ┃
     ┗━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
     
      -
    • Indexing objects can't be tuples because 'obj[x, y]' is converted to 'obj[(x, y)]'!
    • Methods ffill(), interpolate(), fillna() and dropna() accept 'inplace=True'.
    • -
    • Last result has a hierarchical index. Use '<Sr>[key_1, key_2]' to get its values.
    • +
    • Last result has a multi-index. Use '<S>[key_1, key_2]' to get its values.

    DataFrame

    Table with labeled rows and columns.

    >>> l = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']); l
        x  y
    @@ -2638,25 +2638,29 @@ b  3  4
     
    -
    <DF>    = pd.DataFrame(<list_of_rows>)         # Rows can be either lists, dicts or series.
    -<DF>    = pd.DataFrame(<dict_of_columns>)      # Columns can be either lists, dicts or series.
    +
    <DF>   = pd.DataFrame(<list_of_rows>)          # Rows can be either lists, dicts or series.
    +<DF>   = pd.DataFrame(<dict_of_columns>)       # Columns can be either lists, dicts or series.
     
    -
    <el>    = <DF>.loc[row_key, col_key]           # Or: <DF>.iloc[row_i, col_i]
    -<Sr/DF> = <DF>.loc[row_key/s]                  # Or: <DF>.iloc[row_i/s]
    -<Sr/DF> = <DF>.loc[:, col_key/s]               # Or: <DF>.iloc[:, col_i/s]
    -<DF>    = <DF>.loc[row_bools, col_bools]       # Or: <DF>.iloc[row_bools, col_bools]
    +
    <el>   = <DF>.loc[row_key, col_key]            # Or: <DF>.iloc[row_i, col_i]
    +<S/DF> = <DF>.loc[row_key/s]                   # Or: <DF>.iloc[row_i/s]
    +<S/DF> = <DF>.loc[:, col_key/s]                # Or: <DF>.iloc[:, col_i/s]
    +<DF>   = <DF>.loc[row_bools, col_bools]        # Or: <DF>.iloc[row_bools, col_bools]
     
    -
    <Sr/DF> = <DF>[col_key/s]                      # Or: <DF>.<col_key>
    -<DF>    = <DF>[row_bools]                      # Keeps rows as specified by bools.
    -<DF>    = <DF>[<DF_of_bools>]                  # Assigns NaN to items that are False in bools.
    +
    <S/DF> = <DF>[col_key/s]                       # Or: <DF>.<col_key>
    +<DF>   = <DF>[row_bools]                       # Keeps rows as specified by bools.
    +<DF>   = <DF>[<DF_of_bools>]                   # Assigns NaN to items that are False in bools.
     
    -
    <DF>    = <DF> > <el/Sr/DF>                    # Returns DF of bools. Sr is treated as a row.
    -<DF>    = <DF> + <el/Sr/DF>                    # Items with non-matching keys get value NaN.
    +
    <DF>   = <DF> > <el/S/DF>                      # Returns DF of bools. S is treated as a row.
    +<DF>   = <DF> + <el/S/DF>                      # Items with non-matching keys get value NaN.
     
    -
    <DF>    = <DF>.set_index(col_key)              # Replaces row keys with column's values.
    -<DF>    = <DF>.reset_index(drop=False)         # Drops or moves row keys to column named index.
    -<DF>    = <DF>.sort_index(ascending=True)      # Sorts rows by row keys. Use `axis=1` for cols.
    -<DF>    = <DF>.sort_values(col_key/s)          # Sorts rows by passed column/s. Also `axis=1`.
    +
    <DF>   = <DF>.set_index(col_key)               # Replaces row keys with column's values.
    +<DF>   = <DF>.reset_index(drop=False)          # Drops or moves row keys to column named index.
    +<DF>   = <DF>.sort_index(ascending=True)       # Sorts rows by row keys. Use `axis=1` for cols.
    +<DF>   = <DF>.sort_values(col_key/s)           # Sorts rows by passed column/s. Also `axis=1`.
    +
    +
    <DF>   = <DF>.head/tail/sample(<int>)          # Returns first, last, or random n elements.
    +<DF>   = <DF>.describe()                       # Describes columns. Also shape, info(), corr().
    +<DF>   = <DF>.query('<query>')                 # Filters rows with e.g. 'col_1 == val_1 and …'.
     
    <DF>.plot.line/area/bar/scatter(x=col_key, …)  # `y=col_key/s`. Also hist/box(by=col_key).
     plt.show()                                     # Displays the plot. Also plt.savefig(<path>).
    @@ -2684,52 +2688,47 @@ c  6  7
     ┃           axis=0,      │ a  1   2   .  │     2      │            │ Uses 'outer' by default. ┃
     ┃           join=…)      │ b  3   4   .  │     4      │            │ A Series is treated as a ┃
     ┃                        │ b  .   4   54      │            │ column. To add a row use ┃
    -┃                        │ c  .   6   76      │            │ pd.concat([l, DF([sr])]).┃
    +┃                        │ c  .   6   76      │            │ pd.concat([l, DF([s])]). ┃
     ┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨
     ┃ pd.concat([l, r],      │    x  y  y  z │            │            │ Adds columns at the      ┃
     ┃           axis=1,      │ a  1  2  .  . │ x  y  y  z │            │ right end. Uses 'outer'  ┃
     ┃           join=…)      │ b  3  4  4  53  4  4  5 │            │ by default. A Series is  ┃
     ┃                        │ c  .  .  6  7 │            │            │ treated as a column.     ┃
    -┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨
    -┃ l.combine_first(r)     │    x   y   z  │            │            │ Adds missing rows and    ┃
    -┃                        │ a  1   2   .  │            │            │ columns. Also updates    ┃
    -┃                        │ b  3   4   5  │            │            │ items that contain NaN.  ┃
    -┃                        │ c  .   6   7  │            │            │ Argument r must be a DF. ┃
     ┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━┛
     
    -

    DataFrame — Aggregate, Transform, Map:

    <Sr> = <DF>.sum/max/mean/idxmax/all()          # Or: <DF>.apply/agg(lambda <Sr>: <el>)
    -<DF> = <DF>.rank/diff/cumsum/ffill/interpo…()  # Or: <DF>.apply/agg/transfo…(lambda <Sr>: <Sr>)
    -<DF> = <DF>.fillna(<el>)                       # Or: <DF>.applymap(lambda <el>: <el>)
    +

    DataFrame — Aggregate, Transform, Map:

    <S>  = <DF>.sum/max/mean/idxmax/all()          # Or: <DF>.apply/agg(lambda <S>: <el>)
    +<DF> = <DF>.rank/diff/cumsum/ffill/interpo…()  # Or: <DF>.apply/agg/transform(lambda <S>: <S>)
    +<DF> = <DF>.isna/fillna/isin([<el/coll>])      # Or: <S>.agg/transform/map(lambda <el>: <el>)
     
    -
      -
    • All operations operate on columns by default. Pass 'axis=1' to process the rows instead.
    • -
    -
    >>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']); df
    -   x  y
    -a  1  2
    -b  3  4
    -
    ┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
     ┃                 │    'sum'    │   ['sum']   │ {'x': 'sum'}  ┃
     ┠─────────────────┼─────────────┼─────────────┼───────────────┨
    -┃ df.apply(…)     │     x  4    │       x  y  │     x  4      ┃
    -┃ df.agg(…)       │     y  6    │  sum  4  6  │               ┃
    +┃ l.apply(…)      │     x  4    │       x  y  │     x  4      ┃
    +┃ l.agg(…)        │     y  6    │  sum  4  6  │               ┃
     ┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
     
     ┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
     ┃                 │    'rank'   │   ['rank']  │ {'x': 'rank'} ┃
     ┠─────────────────┼─────────────┼─────────────┼───────────────┨
    -┃ df.apply(…)     │             │      x    y │               ┃
    -┃ df.agg(…)       │      x  y   │   rank rank │        x      ┃
    -┃ df.transform(…) │   a  1  1   │ a    1    1 │     a  1      ┃
    -┃                 │   b  2  2   │ b    2    2 │     b  2      ┃
    +┃ l.apply(…)      │             │      x    y │               ┃
    +┃ l.agg(…)        │      x    y │   rank rank │         x     ┃
    +┃ l.transform(…)  │ a  1.0  1.0 │ a  1.0  1.0 │    a  1.0     ┃
    +┃                 │ b  2.0  2.0 │ b  2.0  2.0 │    b  2.0     ┃
     ┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
     
      -
    • Use '<DF>[col_key_1, col_key_2][row_key]' to get the fifth result's values.
    • +
    • All methods operate on columns by default. Pass 'axis=1' to process the rows instead.
    • +
    • Fifth result's columns are indexed with a multi-index. This means we need a tuple of column keys to specify a single column: '<DF>.loc[row_k, (col_k_1, col_k_2)]'.
    +

    DataFrame — Multi-Index:

    <DF>   = <DF>.xs(row_key, level=<int>)         # Rows with key on passed level of multi-index.
    +<DF>   = <DF>.xs(row_keys, level=<ints>)       # Rows that have first key on first level, etc.
    +<DF>   = <DF>.set_index(col_keys)              # Combines multiple columns into a multi-index.
    +<S/DF> = <DF>.stack/unstack(level=-1)          # Combines col keys with row keys or vice versa.
    +<DF>   = <DF>.pivot_table(index=col_key/s, …)  # `columns=col_key/s, values=col_key/s`.
    +
    +

    DataFrame — Encode, Decode:

    <DF> = pd.read_json/html('<str/path/url>')     # Run `$ pip3 install beautifulsoup4 lxml`.
     <DF> = pd.read_csv('<path/url>')               # `header/index_col/dtype/usecols/…=<obj>`.
     <DF> = pd.read_pickle/excel('<path/url>')      # Use `sheet_name=None` to get all Excel sheets.
    @@ -2743,41 +2742,37 @@ b  3  4
     
    • Read_csv() only parses dates of columns that were specified by 'parse_dates' argument. It automatically tries to detect the format, but it can be helped with 'date_format' or 'datefirst' arguments. Both dates and datetimes get stored as pd.Timestamp objects.
    • -
    • If there's a single invalid date then it returns the whole column as a series of strings, unlike '<Sr> = pd.to_datetime(<Sr>, errors="coerce")', which uses pd.NaT.
    • -
    • To get specific attributes from a series of Timestamps use '<Sr>.dt.year/date/…'.
    • +
    • If there's a single invalid date then it returns the whole column as a series of strings, unlike '<S> = pd.to_datetime(<S>, errors="coerce")', which uses pd.NaT.
    • +
    • To get specific attributes from a series of Timestamps use '<S>.dt.year/date/…'.
    -

    GroupBy

    Object that groups together rows of a dataframe based on the value of the passed column.

    >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], list('abc'), list('xyz'))
    +

    GroupBy

    Object that groups together rows of a dataframe based on the value of the passed column.

    <GB> = <DF>.groupby(col_key/s)                 # Splits DF into groups based on passed column.
    +<DF> = <GB>.apply(<func>)                      # Maps each group. Func can return DF, S or el.
    +<DF> = <GB>.get_group(<el>)                    # Selects a group by grouping column's value.
    +<S>  = <GB>.size()                             # S of group sizes. Same keys as get_group().
    +<GB> = <GB>[col_key]                           # Single column GB. All operations return S.
    +
    + + +
    <DF> = <GB>.sum/max/mean/idxmax/all()          # Or: <GB>.agg(lambda <S>: <el>)
    +<DF> = <GB>.rank/diff/cumsum/ffill()           # Or: <GB>.transform(lambda <S>: <S>)
    +<DF> = <GB>.fillna(<el>)                       # Or: <GB>.transform(lambda <S>: <S>)
    +
    +

    Divides rows into groups and sums their columns. Result has a named index that creates column 'z' on reset_index():

    >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], list('abc'), list('xyz'))
     >>> gb = df.groupby('z'); gb.apply(print)
        x  y  z
     a  1  2  3
        x  y  z
     b  4  5  6
    -c  7  8  6
    - - -
    <GB> = <DF>.groupby(col_key/s)                 # Splits DF into groups based on passed column.
    -<DF> = <GB>.apply(<func>)                      # Maps each group. Func can return DF, Sr or el.
    -<DF> = <GB>.get_group(<el>)                    # Selects a group by grouping column's value.
    -<Sr> = <GB>.size()                             # A Sr of group sizes. Same keys as get_group().
    -<GB> = <GB>[col_key]                           # Single column GB. All operations return a Sr.
    -
    -

    GroupBy — Aggregate, Transform, Map:

    <DF> = <GB>.sum/max/mean/idxmax/all()          # Or: <GB>.agg(lambda <Sr>: <el>)
    -<DF> = <GB>.rank/diff/cumsum/ffill()           # Or: <GB>.transform(lambda <Sr>: <Sr>)
    -<DF> = <GB>.fillna(<el>)                       # Or: <GB>.transform(lambda <Sr>: <Sr>)
    -
    - -
    >>> gb.sum()
    +c  7  8  6
    +>>> gb.sum()
         x   y
     z
     3   1   2
    -6  11  13
    -
    -
      -
    • Result has a named index that creates column 'z' instead of 'index' on reset_index().
    • -
    -

    Rolling

    Object for rolling window calculations.

    <RSr/RDF/RGB> = <Sr/DF/GB>.rolling(win_size)   # Also: `min_periods=None, center=False`.
    -<RSr/RDF/RGB> = <RDF/RGB>[col_key/s]           # Or: <RDF/RGB>.col_key
    -<Sr/DF>       = <R>.mean/sum/max()             # Or: <R>.apply/agg(<agg_func/str>)
    +6  11  13
    + +

    Rolling

    Object for rolling window calculations.

    <RS/RDF/RGB> = <S/DF/GB>.rolling(win_size)     # Also: `min_periods=None, center=False`.
    +<RS/RDF/RGB> = <RDF/RGB>[col_key/s]            # Or: <RDF/RGB>.col_key
    +<S/DF>       = <R>.mean/sum/max()              # Or: <R>.apply/agg(<agg_func/str>)
     
    @@ -2928,7 +2923,7 @@ $ deactivate # Deactivates the active
    - +
    diff --git a/parse.js b/parse.js index bb68554..51b7ee2 100755 --- a/parse.js +++ b/parse.js @@ -316,7 +316,13 @@ const GROUPBY = 'a 1 2 3\n' + ' x y z\n' + 'b 4 5 6\n' + - 'c 7 8 6'; + 'c 7 8 6\n' + + '>>> gb.sum()\n' + + ' x y\n' + + 'z\n' + + '3 1 2\n' + + '6 11 13'; + const CYTHON_1 = 'cdef <ctype> <var_name> = <obj>\n' + @@ -576,22 +582,22 @@ const DIAGRAM_12_B = '┗━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━┷━━━━━━━━━━━┛\n'; const DIAGRAM_13_A = - '| sr.apply(…) | 5 | sum 5 | s 5 |'; + '| s.apply(…) | 3 | sum 3 | s 3 |'; const DIAGRAM_13_B = "┏━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓\n" + "┃ │ 'sum' │ ['sum'] │ {'s': 'sum'} ┃\n" + "┠───────────────┼─────────────┼─────────────┼───────────────┨\n" + - "┃ sr.apply(…) │ 5 │ sum 5 │ s 5 ┃\n" + - "┃ sr.agg(…) │ │ │ ┃\n" + + "┃ s.apply(…) │ 3 │ sum 3 │ s 3 ┃\n" + + "┃ s.agg(…) │ │ │ ┃\n" + "┗━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n" + "\n" + "┏━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓\n" + "┃ │ 'rank' │ ['rank'] │ {'r': 'rank'} ┃\n" + "┠───────────────┼─────────────┼─────────────┼───────────────┨\n" + - "┃ sr.apply(…) │ │ rank │ ┃\n" + - "┃ sr.agg(…) │ x 1 │ x 1 │ r x 1 ┃\n" + - "┃ │ y 2 │ y 2 │ y 2 ┃\n" + + "┃ s.apply(…) │ │ rank │ ┃\n" + + "┃ s.agg(…) │ x 1.0 │ x 1.0 │ r x 1.0 ┃\n" + + "┃ │ y 2.0 │ y 2.0 │ y 2.0 ┃\n" + "┗━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n"; const DIAGRAM_14_A = @@ -618,37 +624,32 @@ const DIAGRAM_15_B = "┃ axis=0, │ a 1 2 . │ 2 │ │ Uses 'outer' by default. ┃\n" + "┃ join=…) │ b 3 4 . │ 4 │ │ A Series is treated as a ┃\n" + "┃ │ b . 4 5 │ 4 │ │ column. To add a row use ┃\n" + - "┃ │ c . 6 7 │ 6 │ │ pd.concat([l, DF([sr])]).┃\n" + + "┃ │ c . 6 7 │ 6 │ │ pd.concat([l, DF([s])]). ┃\n" + "┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨\n" + "┃ pd.concat([l, r], │ x y y z │ │ │ Adds columns at the ┃\n" + "┃ axis=1, │ a 1 2 . . │ x y y z │ │ right end. Uses 'outer' ┃\n" + "┃ join=…) │ b 3 4 4 5 │ 3 4 4 5 │ │ by default. A Series is ┃\n" + "┃ │ c . . 6 7 │ │ │ treated as a column. ┃\n" + - "┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨\n" + - "┃ l.combine_first(r) │ x y z │ │ │ Adds missing rows and ┃\n" + - "┃ │ a 1 2 . │ │ │ columns. Also updates ┃\n" + - "┃ │ b 3 4 5 │ │ │ items that contain NaN. ┃\n" + - "┃ │ c . 6 7 │ │ │ Argument r must be a DF. ┃\n" + "┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━┛\n"; const DIAGRAM_16_A = - '| df.apply(…) | x 4 | x y | x 4 |'; + '| l.apply(…) | x 4 | x y | x 4 |'; const DIAGRAM_16_B = "┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓\n" + "┃ │ 'sum' │ ['sum'] │ {'x': 'sum'} ┃\n" + "┠─────────────────┼─────────────┼─────────────┼───────────────┨\n" + - "┃ df.apply(…) │ x 4 │ x y │ x 4 ┃\n" + - "┃ df.agg(…) │ y 6 │ sum 4 6 │ ┃\n" + + "┃ l.apply(…) │ x 4 │ x y │ x 4 ┃\n" + + "┃ l.agg(…) │ y 6 │ sum 4 6 │ ┃\n" + "┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n" + "\n" + "┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓\n" + "┃ │ 'rank' │ ['rank'] │ {'x': 'rank'} ┃\n" + "┠─────────────────┼─────────────┼─────────────┼───────────────┨\n" + - "┃ df.apply(…) │ │ x y │ ┃\n" + - "┃ df.agg(…) │ x y │ rank rank │ x ┃\n" + - "┃ df.transform(…) │ a 1 1 │ a 1 1 │ a 1 ┃\n" + - "┃ │ b 2 2 │ b 2 2 │ b 2 ┃\n" + + "┃ l.apply(…) │ │ x y │ ┃\n" + + "┃ l.agg(…) │ x y │ rank rank │ x ┃\n" + + "┃ l.transform(…) │ a 1.0 1.0 │ a 1.0 1.0 │ a 1.0 ┃\n" + + "┃ │ b 2.0 2.0 │ b 2.0 2.0 │ b 2.0 ┃\n" + "┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n"; const DIAGRAM_17_A =