From cc94902871c5365123f61d3deddeb4a88ef62f78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Thu, 25 Jun 2020 22:57:20 +0200 Subject: [PATCH] Working on Pandas --- README.md | 101 +++++++++++++++++++++++++-------------------------- index.html | 105 ++++++++++++++++++++++++++--------------------------- 2 files changed, 100 insertions(+), 106 deletions(-) diff --git a/README.md b/README.md index 999b99e..6757111 100644 --- a/README.md +++ b/README.md @@ -3111,13 +3111,12 @@ Name: a, dtype: int64 .update() # Updates items that are already present. ``` -#### Apply, Aggregate, Transform: +#### Aggregate, Transform, Map: ```python = .sum/max/mean/idxmax/all() # Or: .aggregate() - = .rank/diff/cumsum/pct_change() # Or: .agg/transform() + = .rank/diff/cumsum/ffill/interpl() # Or: .agg/transform() = .fillna() # Or: .apply/agg/transform/map() ``` -* **Also: `'ffill()'` and `'interpolate()'`.** * **The way `'aggregate()'` and `'transform()'` find out whether a function accepts an element or the whole Series is by passing it a single value at first and if it raises an error, then they pass it the whole Series.** ```python @@ -3128,23 +3127,22 @@ dtype: int64 ``` ```python -+-------------+---------------+---------------+---------------+ -| | 'sum' | ['sum'] | {'s': 'sum'} | -+-------------+---------------+---------------+---------------+ -| sr.apply(…) | | | | -| sr.agg(…) | 3 | sum 3 | s 3 | -| | | | | -+-------------+---------------+---------------+---------------+ +┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓ +┃ │ 'sum' │ ['sum'] │ {'s': 'sum'} ┃ +┠─────────────┼─────────────┼─────────────┼───────────────┨ +┃ sr.apply(…) │ 3 │ sum 3 │ s 3 ┃ +┃ sr.agg(…) │ │ │ ┃ +┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛ ``` ```python -+-------------+---------------+---------------+---------------+ -| | 'rank' | ['rank'] | {'r': 'rank'} | -+-------------+---------------+---------------+---------------+ -| sr.apply(…) | | rank | | -| sr.agg(…) | x 1 | x 1 | r x 1 | -| sr.trans(…) | y 2 | y 2 | y 2 | -+-------------+---------------+---------------+---------------+ +┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓ +┃ │ 'rank' │ ['rank'] │ {'r': 'rank'} ┃ +┠─────────────┼─────────────┼─────────────┼───────────────┨ +┃ sr.apply(…) │ │ rank │ ┃ +┃ sr.agg(…) │ x 1 │ x 1 │ r x 1 ┃ +┃ sr.trans(…) │ y 2 │ y 2 │ y 2 ┃ +┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛ ``` ### DataFrame @@ -3201,7 +3199,7 @@ c 6 7 ```python ┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━┓ -┃ how/join │ 'outer' │ 'inner' │ 'left' │ description ┃ +┃ how/join │ 'outer' │ 'inner' │ 'left' │ description ┃ ┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨ ┃ l.merge(r, on='y', │ x y z │ x y z │ x y z │ Joins/merges on column. ┃ ┃ how=…) │ 0 1 2 . │ 3 4 5 │ 1 2 . │ Also accepts left_on and ┃ @@ -3231,13 +3229,12 @@ c 6 7 ┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━┛ ``` -#### Apply, Aggregate, Transform: +#### Aggregate, Transform, Map: ```python = .sum/max/mean/idxmax/all() # Or: .apply/agg/transform() - = .rank/diff/cumsum/pct_change() # Or: .apply/agg/transform() + = .rank/diff/cumsum/ffill/interpl() # Or: .apply/agg/transform() = .fillna() # Or: .applymap() ``` -* **Also: `'ffill()'` and `'interpolate()'`.** * **All operations operate on columns by default. Use `'axis=1'` parameter to process the rows instead.** ```python @@ -3248,24 +3245,24 @@ b 3 4 ``` ```python -+-------------+---------------+---------------+---------------+ -| | 'sum' | ['sum'] | {'x': 'sum'} | -+-------------+---------------+---------------+---------------+ -| df.apply(…) | | x y | | -| df.agg(…) | x 4 | sum 4 6 | x 4 | -| | y 6 | | | -+-------------+---------------+---------------+---------------+ +┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓ +┃ │ 'sum' │ ['sum'] │ {'x': 'sum'} ┃ +┠─────────────┼─────────────┼─────────────┼───────────────┨ +┃ df.apply(…) │ │ x y │ ┃ +┃ df.agg(…) │ x 4 │ sum 4 6 │ x 4 ┃ +┃ │ y 6 │ │ ┃ +┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛ ``` ```python -+-------------+---------------+---------------+---------------+ -| | 'rank' | ['rank'] | {'x': 'rank'} | -+-------------+---------------+---------------+---------------+ -| df.apply(…) | x y | x y | x | -| df.agg(…) | a 1 1 | rank rank | a 1 | -| df.trans(…) | b 2 2 | a 1 1 | b 2 | -| | | b 2 2 | | -+-------------+---------------+---------------+---------------+ +┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓ +┃ │ 'rank' │ ['rank'] │ {'x': 'rank'} ┃ +┠─────────────┼─────────────┼─────────────┼───────────────┨ +┃ df.apply(…) │ x y │ x y │ x ┃ +┃ df.agg(…) │ a 1 1 │ rank rank │ a 1 ┃ +┃ df.trans(…) │ b 2 2 │ a 1 1 │ b 2 ┃ +┃ │ │ b 2 2 │ ┃ +┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛ ``` #### Encode, Decode: @@ -3284,7 +3281,7 @@ b 3 4 ``` ### GroupBy -**Object that groups together rows of a dataframe based on the value of passed column.** +**Object that groups together rows of a dataframe based on the value of the passed column.** ```python >>> df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], index=list('abc'), columns=list('xyz')) @@ -3302,7 +3299,7 @@ c 7 8 = .get_group(group_key) # Selects a group by value of grouping column. ``` -#### Apply, Aggregate, Transform: +#### Aggregate, Transform, Map: ```python = .sum/max/mean/idxmax/all() # Or: .apply/agg() = .rank/diff/cumsum/ffill() # Or: .aggregate() @@ -3318,20 +3315,20 @@ c 7 8 ``` ```python -+-------------+-------------+-------------+-------------+---------------+ -| | 'sum' | 'rank' | ['rank'] | {'x': 'rank'} | -+-------------+-------------+-------------+-------------+---------------+ -| gb.agg(…) | x y | x y | x y | x | -| | z | a 1 1 | rank rank | a 1 | -| | 3 1 2 | b 1 1 | a 1 1 | b 1 | -| | 6 11 13 | c 2 2 | b 1 1 | c 2 | -| | | | c 2 2 | | -+-------------+-------------+-------------+-------------+---------------+ -| gb.trans(…) | x y | x y | | | -| | a 1 2 | a 1 1 | | | -| | b 11 13 | b 1 1 | | | -| | c 11 13 | c 1 1 | | | -+-------------+-------------+-------------+-------------+---------------+ +┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓ +┃ │ 'sum' │ 'rank' │ ['rank'] │ {'x': 'rank'} ┃ +┠─────────────┼─────────────┼─────────────┼─────────────┼───────────────┨ +┃ gb.agg(…) │ x y │ x y │ x y │ x ┃ +┃ │ z │ a 1 1 │ rank rank │ a 1 ┃ +┃ │ 3 1 2 │ b 1 1 │ a 1 1 │ b 1 ┃ +┃ │ 6 11 13 │ c 2 2 │ b 1 1 │ c 2 ┃ +┃ │ │ │ c 2 2 │ ┃ +┠─────────────┼─────────────┼─────────────┼─────────────┼───────────────┨ +┃ gb.trans(…) │ x y │ x y │ │ ┃ +┃ │ a 1 2 │ a 1 1 │ │ ┃ +┃ │ b 11 13 │ b 1 1 │ │ ┃ +┃ │ c 11 13 │ c 1 1 │ │ ┃ +┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛ ``` ### Rolling diff --git a/index.html b/index.html index 5cff2ce..7a8917d 100644 --- a/index.html +++ b/index.html @@ -2639,13 +2639,12 @@ Name: a, dtype: int64 <Sr> = <Sr>.combine_first(<Sr>) # Adds items that are not yet present. <Sr>.update(<Sr>) # Updates items that are already present. -

Apply, Aggregate, Transform:

<el> = <Sr>.sum/max/mean/idxmax/all()         # Or: <Sr>.aggregate(<agg_func>)
-<Sr> = <Sr>.rank/diff/cumsum/pct_change()     # Or: <Sr>.agg/transform(<trans_func>)
+

Aggregate, Transform, Map:

<el> = <Sr>.sum/max/mean/idxmax/all()         # Or: <Sr>.aggregate(<agg_func>)
+<Sr> = <Sr>.rank/diff/cumsum/ffill/interpl()  # Or: <Sr>.agg/transform(<trans_func>)
 <Sr> = <Sr>.fillna(<el>)                      # Or: <Sr>.apply/agg/transform/map(<map_func>)
 
    -
  • Also: 'ffill()' and 'interpolate()'.
  • The way 'aggregate()' and 'transform()' find out whether a function accepts an element or the whole Series is by passing it a single value at first and if it raises an error, then they pass it the whole Series.
>>> sr = Series([1, 2], index=['x', 'y'])
@@ -2653,21 +2652,20 @@ x    1
 y    2
 dtype: int64
 
-
+-------------+---------------+---------------+---------------+
-|             |     'sum'     |    ['sum']    |  {'s': 'sum'} |
-+-------------+---------------+---------------+---------------+
-| sr.apply(…) |               |               |               |
-| sr.agg(…)   |       3       |     sum 3     |      s  3     |
-|             |               |               |               |
-+-------------+---------------+---------------+---------------+
-
-
+-------------+---------------+---------------+---------------+
-|             |    'rank'     |   ['rank']    | {'r': 'rank'} |
-+-------------+---------------+---------------+---------------+
-| sr.apply(…) |               |       rank    |               |
-| sr.agg(…)   |     x  1      |    x     1    |    r  x  1    |
-| sr.trans(…) |     y  2      |    y     2    |       y  2    |
-+-------------+---------------+---------------+---------------+
+
┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
+┃             │    'sum'    │   ['sum']   │ {'s': 'sum'}  ┃
+┠─────────────┼─────────────┼─────────────┼───────────────┨
+┃ sr.apply(…) │      3      │    sum 3    │      s  3     ┃
+┃ sr.agg(…)   │             │             │               ┃
+┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
+
+
┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
+┃             │    'rank'   │   ['rank']  │ {'r': 'rank'} ┃
+┠─────────────┼─────────────┼─────────────┼───────────────┨
+┃ sr.apply(…) │             │      rank   │               ┃
+┃ sr.agg(…)   │    x  1     │   x     1   │    r  x  1    ┃
+┃ sr.trans(…) │    y  2     │   y     2   │       y  2    ┃
+┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
 

DataFrame

Table with labeled rows and columns.

>>> DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
    x  y
@@ -2707,7 +2705,7 @@ c  6  7
 
┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━┓
-┃        how/join        │    'outer''inner''left'   │       description        ┃
+┃        how/join        │    'outer''inner''left'   │       description        ┃
 ┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨
 ┃ l.merge(r, on='y',     │    x   y   z  │ x   y   z  │ x   y   z  │ Joins/merges on column.  ┃
 ┃            how=…)      │ 0  1   2   .  │ 3   4   51   2   .  │ Also accepts left_on and ┃
@@ -2736,13 +2734,12 @@ c  6  7
 ┃                        │ c  .   6   7  │            │            │                          ┃
 ┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━┛
 
-

Apply, Aggregate, Transform:

<Sr> = <DF>.sum/max/mean/idxmax/all()         # Or: <DF>.apply/agg/transform(<agg_func>)
-<DF> = <DF>.rank/diff/cumsum/pct_change()     # Or: <DF>.apply/agg/transform(<trans_func>)
+

Aggregate, Transform, Map:

<Sr> = <DF>.sum/max/mean/idxmax/all()         # Or: <DF>.apply/agg/transform(<agg_func>)
+<DF> = <DF>.rank/diff/cumsum/ffill/interpl()  # Or: <DF>.apply/agg/transform(<trans_func>)
 <DF> = <DF>.fillna(<el>)                      # Or: <DF>.applymap(<map_func>)
 
    -
  • Also: 'ffill()' and 'interpolate()'.
  • All operations operate on columns by default. Use 'axis=1' parameter to process the rows instead.
>>> df = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
@@ -2750,22 +2747,22 @@ c  6  7
 a  1  2
 b  3  4
 
-
+-------------+---------------+---------------+---------------+
-|             |     'sum'     |    ['sum']    | {'x': 'sum'}  |
-+-------------+---------------+---------------+---------------+
-| df.apply(…) |               |        x y    |               |
-| df.agg(…)   |     x  4      |    sum 4 6    |     x  4      |
-|             |     y  6      |               |               |
-+-------------+---------------+---------------+---------------+
-
-
+-------------+---------------+---------------+---------------+
-|             |    'rank'     |   ['rank']    | {'x': 'rank'} |
-+-------------+---------------+---------------+---------------+
-| df.apply(…) |       x  y    |       x    y  |        x      |
-| df.agg(…)   |    a  1  1    |    rank rank  |     a  1      |
-| df.trans(…) |    b  2  2    |  a    1    1  |     b  2      |
-|             |               |  b    2    2  |               |
-+-------------+---------------+---------------+---------------+
+
┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
+┃             │    'sum'    │   ['sum']   │ {'x': 'sum'}  ┃
+┠─────────────┼─────────────┼─────────────┼───────────────┨
+┃ df.apply(…) │             │       x y   │               ┃
+┃ df.agg(…)   │    x  4     │   sum 4 6   │     x  4      ┃
+┃             │    y  6     │             │               ┃
+┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
+
+
┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
+┃             │    'rank'   │   ['rank']  │ {'x': 'rank'} ┃
+┠─────────────┼─────────────┼─────────────┼───────────────┨
+┃ df.apply(…) │      x  y   │      x    y │        x      ┃
+┃ df.agg(…)   │   a  1  1   │   rank rank │     a  1      ┃
+┃ df.trans(…) │   b  2  2   │ a    1    1 │     b  2      ┃
+┃             │             │ b    2    2 │               ┃
+┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
 

Encode, Decode:

<DF> = pd.read_json/html('<str/path/url>')
 <DF> = pd.read_csv/pickle/excel('<path/url>')
@@ -2778,7 +2775,7 @@ b  3  4
 <DF>.to_pickle/excel(<path>)
 <DF>.to_sql('<table_name>', <connection>)
 
-

GroupBy

Object that groups together rows of a dataframe based on the value of passed column.

>>> df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], index=list('abc'), columns=list('xyz'))
+

GroupBy

Object that groups together rows of a dataframe based on the value of the passed column.

>>> df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], index=list('abc'), columns=list('xyz'))
 >>> df.groupby('z').get_group(3)
    x  y
 a  1  2
@@ -2792,7 +2789,7 @@ c  7  8
 
<GB> = <DF>.groupby(column_key/s)             # DF is split into groups based on passed column.
 <DF> = <GB>.get_group(group_key)              # Selects a group by value of grouping column.
 
-

Apply, Aggregate, Transform:

<DF> = <GB>.sum/max/mean/idxmax/all()         # Or: <GB>.apply/agg(<agg_func>)
+

Aggregate, Transform, Map:

<DF> = <GB>.sum/max/mean/idxmax/all()         # Or: <GB>.apply/agg(<agg_func>)
 <DF> = <GB>.rank/diff/cumsum/ffill()          # Or: <GB>.aggregate(<trans_func>)  
 <DF> = <GB>.fillna(<el>)                      # Or: <GB>.transform(<map_func>)
 
@@ -2803,20 +2800,20 @@ c 7 8 6: b 4 5 6 c 7 8 6
-
+-------------+-------------+-------------+-------------+---------------+
-|             |    'sum'    |   'rank'    |  ['rank']   | {'x': 'rank'} |
-+-------------+-------------+-------------+-------------+---------------+
-| gb.agg(…)   |      x   y  |      x  y   |      x    y |        x      |
-|             |  z          |   a  1  1   |   rank rank |     a  1      |
-|             |  3   1   2  |   b  1  1   | a    1    1 |     b  1      |
-|             |  6  11  13  |   c  2  2   | b    1    1 |     c  2      |
-|             |             |             | c    2    2 |               |
-+-------------+-------------+-------------+-------------+---------------+
-| gb.trans(…) |      x   y  |      x  y   |             |               |
-|             |  a   1   2  |   a  1  1   |             |               |
-|             |  b  11  13  |   b  1  1   |             |               |
-|             |  c  11  13  |   c  1  1   |             |               |
-+-------------+-------------+-------------+-------------+---------------+
+
┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
+┃             │    'sum''rank'   │   ['rank']  │ {'x': 'rank'} ┃
+┠─────────────┼─────────────┼─────────────┼─────────────┼───────────────┨
+┃ gb.agg(…)   │      x   y  │      x  y   │      x    y │        x      ┃
+┃             │  z          │   a  1  1   │   rank rank │     a  1      ┃
+┃             │  3   1   2  │   b  1  1   │ a    1    1 │     b  1      ┃
+┃             │  6  11  13  │   c  2  2   │ b    1    1 │     c  2      ┃
+┃             │             │             │ c    2    2 │               ┃
+┠─────────────┼─────────────┼─────────────┼─────────────┼───────────────┨
+┃ gb.trans(…) │      x   y  │      x  y   │             │               ┃
+┃             │  a   1   2  │   a  1  1   │             │               ┃
+┃             │  b  11  13  │   b  1  1   │             │               ┃
+┃             │  c  11  13  │   c  1  1   │             │               ┃
+┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
 

Rolling

Object for rolling window calculations.

<R_Sr/R_DF/R_GB> = <Sr/DF/GB>.rolling(window_size)  # Also: `min_periods=None, center=False`.
 <R_Sr/R_DF>      = <R_DF/R_GB>[column_key/s]        # Or: <R>.column_key