From 3d7612c74c749cb23dc5cd6ba9e35f780a34e0e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Thu, 25 Jun 2020 17:00:57 +0200 Subject: [PATCH] Working on Pandas --- README.md | 58 ++++++++++++++++++---------------------------------- index.html | 60 ++++++++++++++++++++---------------------------------- 2 files changed, 42 insertions(+), 76 deletions(-) diff --git a/README.md b/README.md index 99d4513..234aa31 100644 --- a/README.md +++ b/README.md @@ -3114,7 +3114,7 @@ Name: a, dtype: int64 #### Apply, Aggregate, Transform: ```python = .sum/max/mean/idxmax/all() # Or: .aggregate() - = .diff/cumsum/rank/pct_change() # Or: .agg/transform() + = .rank/diff/cumsum/pct_change() # Or: .agg/transform() = .fillna() # Or: .apply/agg/transform/map() ``` * **Also: `'ffill()'` and `'interpolate()'`.** @@ -3234,7 +3234,7 @@ c 6 7 #### Apply, Aggregate, Transform: ```python = .sum/max/mean/idxmax/all() # Or: .apply/agg/transform() - = .diff/cumsum/rank/pct_change() # Or: .apply/agg/transform() + = .rank/diff/cumsum/pct_change() # Or: .apply/agg/transform() = .fillna() # Or: .applymap() ``` * **Also: `'ffill()'` and `'interpolate()'`.** @@ -3268,7 +3268,7 @@ b 3 4 +-------------+---------------+---------------+---------------+ ``` -#### Encode: +#### Encode, Decode: ```python = pd.read_json/html('') = pd.read_csv/pickle/excel('') @@ -3276,7 +3276,6 @@ b 3 4 = pd.read_clipboard() ``` -#### Decode: ```python = .to_dict(['d/l/s/sp/r/i']) = .to_json/html/csv/markdown/latex([]) @@ -3295,7 +3294,7 @@ b 3 4 #### Apply, Aggregate, Transform: ```python = .sum/max/mean/idxmax/all() # Or: .apply/agg() - = .diff/cumsum/rank/ffill() # Or: .aggregate() + = .rank/diff/cumsum/ffill() # Or: .aggregate() = .fillna() # Or: .transform() ``` @@ -3309,37 +3308,20 @@ b 3 4 ``` ```python -+-------------+-------------+-------------+---------------+ -| | 'sum' | ['sum'] | {'x': 'sum'} | -+-------------+-------------+-------------+---------------+ -| gb.agg(…) | x y | x y | x | -| | z | sum sum | z | -| | 3 1 2 | z | 3 1 | -| | 6 11 13 | 3 1 2 | 6 11 | -| | | 6 11 13 | | -+-------------+-------------+-------------+---------------+ -| gb.trans(…) | x y | | | -| | a 1 2 | | | -| | b 11 13 | | | -| | c 11 13 | | | -+-------------+-------------+-------------+---------------+ -``` - -```python -+-------------+-------------+-------------+---------------+ -| | 'rank' | ['rank'] | {'x': 'rank'} | -+-------------+-------------+-------------+---------------+ -| gb.agg(…) | x y | x y | x | -| | a 1 1 | rank rank | a 1 | -| | b 1 1 | a 1 1 | b 1 | -| | c 2 2 | b 1 1 | c 2 | -| | | c 2 2 | | -+-------------+-------------+-------------+---------------+ -| gb.trans(…) | x y | | | -| | a 1 1 | | | -| | b 1 1 | | | -| | c 1 1 | | | -+-------------+-------------+-------------+---------------+ ++-------------+-------------+-------------+-------------+---------------+ +| | 'sum' | 'rank' | ['rank'] | {'x': 'rank'} | ++-------------+-------------+-------------+-------------+---------------+ +| gb.agg(…) | x y | x y | x y | x | +| | z | a 1 1 | rank rank | a 1 | +| | 3 1 2 | b 1 1 | a 1 1 | b 1 | +| | 6 11 13 | c 2 2 | b 1 1 | c 2 | +| | | | c 2 2 | | ++-------------+-------------+-------------+-------------+---------------+ +| gb.trans(…) | x y | x y | | | +| | a 1 2 | a 1 1 | | | +| | b 11 13 | b 1 1 | | | +| | c 11 13 | c 1 1 | | | ++-------------+-------------+-------------+-------------+---------------+ ``` ### Rolling @@ -3427,7 +3409,7 @@ import pandas, datetime import plotly.graph_objects as go def main(): - display_data(mangle_data(*scrape_data())) + display_data(wrangle_data(*scrape_data())) def scrape_data(): def scrape_yah(id_): @@ -3442,7 +3424,7 @@ def scrape_data(): dow_jones.name, gold.name, bitcoin.name = 'Dow Jones', 'Gold', 'Bitcoin' return covid, dow_jones, gold, bitcoin -def mangle_data(covid, dow_jones, gold, bitcoin): +def wrangle_data(covid, dow_jones, gold, bitcoin): out = pandas.concat([covid, dow_jones, gold, bitcoin], axis=1) out = out.loc['2020-02-23':].iloc[:-2] out = out.interpolate() diff --git a/index.html b/index.html index 1b5ff38..2074c7e 100644 --- a/index.html +++ b/index.html @@ -2640,7 +2640,7 @@ Name: a, dtype: int64 <Sr>.update(<Sr>) # Updates items that are already present.

Apply, Aggregate, Transform:

<el> = <Sr>.sum/max/mean/idxmax/all()         # Or: <Sr>.aggregate(<agg_func>)
-<Sr> = <Sr>.diff/cumsum/rank/pct_change()     # Or: <Sr>.agg/transform(<trans_func>)
+<Sr> = <Sr>.rank/diff/cumsum/pct_change()     # Or: <Sr>.agg/transform(<trans_func>)
 <Sr> = <Sr>.fillna(<el>)                      # Or: <Sr>.apply/agg/transform/map(<map_func>)
 
@@ -2737,7 +2737,7 @@ c 6 7 ┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━┛

Apply, Aggregate, Transform:

<Sr> = <DF>.sum/max/mean/idxmax/all()         # Or: <DF>.apply/agg/transform(<agg_func>)
-<DF> = <DF>.diff/cumsum/rank/pct_change()     # Or: <DF>.apply/agg/transform(<trans_func>)
+<DF> = <DF>.rank/diff/cumsum/pct_change()     # Or: <DF>.apply/agg/transform(<trans_func>)
 <DF> = <DF>.fillna(<el>)                      # Or: <DF>.applymap(<map_func>)
 
@@ -2767,25 +2767,24 @@ b 3 4 | | | b 2 2 | | +-------------+---------------+---------------+---------------+ -

Encode:

<DF> = pd.read_json/html('<str/path/url>')
+

Encode, Decode:

<DF> = pd.read_json/html('<str/path/url>')
 <DF> = pd.read_csv/pickle/excel('<path/url>')
 <DF> = pd.read_sql('<query>', <connection>)
 <DF> = pd.read_clipboard()
 
-

Decode:

<dict> = <DF>.to_dict(['d/l/s/sp/r/i'])
+
<dict> = <DF>.to_dict(['d/l/s/sp/r/i'])
 <str>  = <DF>.to_json/html/csv/markdown/latex([<path>])
 <DF>.to_pickle/excel(<path>)
 <DF>.to_sql('<table_name>', <connection>)
-
- +

GroupBy

Object that groups together rows of a dataframe based on the value of passed column.

<GB> = <DF>.groupby(column_key/s)             # DF is split into groups based on passed column.
 <DF> = <GB>.get_group(group_key)              # Selects a group by value of grouping column.
 

Apply, Aggregate, Transform:

<DF> = <GB>.sum/max/mean/idxmax/all()         # Or: <GB>.apply/agg(<agg_func>)
-<DF> = <GB>.diff/cumsum/rank/ffill()          # Or: <GB>.aggregate(<trans_func>)  
+<DF> = <GB>.rank/diff/cumsum/ffill()          # Or: <GB>.aggregate(<trans_func>)  
 <DF> = <GB>.fillna(<el>)                      # Or: <GB>.transform(<map_func>)
 
@@ -2796,35 +2795,20 @@ b 3 4 6: b 4 5 6 c 7 8 6
-
+-------------+-------------+-------------+---------------+
-|             |    'sum'    |   ['sum']   | {'x': 'sum'}  |
-+-------------+-------------+-------------+---------------+
-| gb.agg(…)   |      x   y  |      x   y  |         x     |
-|             |  z          |    sum sum  |     z         |
-|             |  3   1   2  |  z          |     3   1     |
-|             |  6  11  13  |  3   1   2  |     6  11     |
-|             |             |  6  11  13  |               |
-+-------------+-------------+-------------+---------------+
-| gb.trans(…) |      x   y  |             |               |
-|             |  a   1   2  |             |               |
-|             |  b  11  13  |             |               |
-|             |  c  11  13  |             |               |
-+-------------+-------------+-------------+---------------+
-
-
+-------------+-------------+-------------+---------------+
-|             |   'rank'    |  ['rank']   | {'x': 'rank'} |
-+-------------+-------------+-------------+---------------+
-| gb.agg(…)   |      x  y   |      x    y |        x      |
-|             |   a  1  1   |   rank rank |     a  1      |
-|             |   b  1  1   | a    1    1 |     b  1      |
-|             |   c  2  2   | b    1    1 |     c  2      |
-|             |             | c    2    2 |               |
-+-------------+-------------+-------------+---------------+
-| gb.trans(…) |      x  y   |             |               |
-|             |   a  1  1   |             |               |
-|             |   b  1  1   |             |               |
-|             |   c  1  1   |             |               |
-+-------------+-------------+-------------+---------------+
+
+-------------+-------------+-------------+-------------+---------------+
+|             |    'sum'    |   'rank'    |  ['rank']   | {'x': 'rank'} |
++-------------+-------------+-------------+-------------+---------------+
+| gb.agg(…)   |      x   y  |      x  y   |      x    y |        x      |
+|             |  z          |   a  1  1   |   rank rank |     a  1      |
+|             |  3   1   2  |   b  1  1   | a    1    1 |     b  1      |
+|             |  6  11  13  |   c  2  2   | b    1    1 |     c  2      |
+|             |             |             | c    2    2 |               |
++-------------+-------------+-------------+-------------+---------------+
+| gb.trans(…) |      x   y  |      x  y   |             |               |
+|             |  a   1   2  |   a  1  1   |             |               |
+|             |  b  11  13  |   b  1  1   |             |               |
+|             |  c  11  13  |   c  1  1   |             |               |
++-------------+-------------+-------------+-------------+---------------+
 

Rolling

<Rl_S/D/G> = <Sr/DF/GB>.rolling(window_size)  # Also: `min_periods=None, center=False`.
 <Rl_S/D>   = <Rl_D/G>[column_key/s]           # Or: <Rl>.column_key
@@ -2899,7 +2883,7 @@ plotly.express.line(summed, x='Date', y=import plotly.graph_objects as go
 
 def main():
-    display_data(mangle_data(*scrape_data()))
+    display_data(wrangle_data(*scrape_data()))
 
 def scrape_data():
     def scrape_yah(id_):
@@ -2914,7 +2898,7 @@ plotly.express.line(summed, x='Date', y='Dow Jones', 'Gold', 'Bitcoin'
     return covid, dow_jones, gold, bitcoin
 
-def mangle_data(covid, dow_jones, gold, bitcoin):
+def wrangle_data(covid, dow_jones, gold, bitcoin):
     out = pandas.concat([covid, dow_jones, gold, bitcoin], axis=1)
     out = out.loc['2020-02-23':].iloc[:-2]
     out = out.interpolate()