From 00bb34baad45e715c902595ccf1457f9ce40b689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Thu, 25 Jun 2020 03:24:19 +0200 Subject: [PATCH] Working on Pandas --- README.md | 39 ++++++++++++++++++++------------------- index.html | 41 ++++++++++++++++++++++------------------- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 940a77b..614cced 100644 --- a/README.md +++ b/README.md @@ -3076,7 +3076,7 @@ from pandas import Series, DataFrame **Ordered dictionary with a name.** ```python ->>> sr = Series([1, 2], index=['x', 'y'], name='a') +>>> Series([1, 2], index=['x', 'y'], name='a') x 1 y 2 Name: a, dtype: int64 @@ -3107,15 +3107,24 @@ Name: a, dtype: int64 ```python = .append() # Or: pd.concat() - = .combine_first() # Adds items that are not yet present (extends). -combine, update? + = .combine_first() # Adds items that are not yet present. +.update() # Updates items that are already present. ``` -#### Aggregations: ```python - = .sum/max/mean/idxmax/all() - = () - = .apply/agg() # Apply can only accept strings. + = .sum/max/mean/idxmax/all() # Or: .aggregate() + = .diff/cumsum/rank/pct_change() # Or: .agg/transform() + = .fillna() # Or: .apply/agg/transform/map() +``` +* **Also: `'ffill()'` and `'interpolate()'`.** +* **The way `'aggregate()'` and `'transform()'` find out whether a function accepts an element or the whole Series is by passing it a single value at first and if it raises an error, then they pass it the whole Series.** + +#### Apply, Aggregate, Transform: +```python +>>> sr = Series([1, 2], index=['x', 'y'], name='a') +x 1 +y 2 +Name: a, dtype: int64 ``` ```python @@ -3128,13 +3137,6 @@ combine, update? +-------------+--------+-----------+---------------+ ``` -#### Transformations: -```python - = .diff/cumsum/rank/pct_change() # …/fillna/ffill/interpolate() - = .apply/agg/transform() -map? -``` - ```python +-------------+--------+-----------+---------------+ | | 'rank' | ['rank'] | {'r': 'rank'} | @@ -3186,12 +3188,11 @@ b 3 4 ``` ```python - = .sum/max/mean/idxmax/all() - = .apply/agg/transform() - = .diff/cumsum/rank() # …/pct_change/fillna/ffill/interpolate() - = .apply/agg/transform() - = .applymap() # Apply a function to a Dataframe elementwise. + = .sum/max/mean/idxmax/all() # Or: .apply/agg/transform() + = .diff/cumsum/rank/pct_change() # Or: .apply/agg/transform() + = .fillna() # Or: .applymap() ``` +* **Also: `'ffill()'` and `'interpolate()'`.** * **All operations operate on columns by default. Use `'axis=1'` parameter to process the rows instead.** #### Apply, Aggregate, Transform: diff --git a/index.html b/index.html index 69e6ccd..0cf2f48 100644 --- a/index.html +++ b/index.html @@ -2613,7 +2613,7 @@ SIZE, MAX_SPEED = 50, P(from pandas import Series, DataFrame -

Series

Ordered dictionary with a name.

>>> sr = Series([1, 2], index=['x', 'y'], name='a')
+

Series

Ordered dictionary with a name.

>>> Series([1, 2], index=['x', 'y'], name='a')
 x    1
 y    2
 Name: a, dtype: int64
@@ -2636,12 +2636,21 @@ Name: a, dtype: int64
 <Sr> = <Sr> +-*/ <el/Sr>                      # Non-matching keys get value NaN.
 
<Sr> = <Sr>.append(<Sr>)                      # Or: pd.concat(<coll_of_Sr>)
-<Sr> = <Sr>.combine_first(<Sr>)               # Adds items that are not yet present (extends).
-combine, update?
+<Sr> = <Sr>.combine_first(<Sr>)               # Adds items that are not yet present.
+<Sr>.update(<Sr>)                             # Updates items that are already present.
 
-

Aggregations:

<el> = <Sr>.sum/max/mean/idxmax/all()
-<el> = <agg_func>(<Sr>)
-<el> = <Sr>.apply/agg(<agg_func>)             # Apply can only accept strings.
+
<el> = <Sr>.sum/max/mean/idxmax/all()         # Or: <Sr>.aggregate(<agg_func>)
+<Sr> = <Sr>.diff/cumsum/rank/pct_change()     # Or: <Sr>.agg/transform(<trans_func>)
+<Sr> = <Sr>.fillna(<el>)                      # Or: <Sr>.apply/agg/transform/map(<map_func>)
+
+
    +
  • Also: 'ffill()' and 'interpolate()'.
  • +
  • The way 'aggregate()' and 'transform()' find out whether a function accepts an element or the whole Series is by passing it a single value at first and if it raises an error, then they pass it the whole Series.
  • +
+

Apply, Aggregate, Transform:

>>> sr = Series([1, 2], index=['x', 'y'], name='a')
+x    1
+y    2
+Name: a, dtype: int64
 
+-------------+--------+-----------+---------------+
@@ -2652,11 +2661,6 @@ combine, update?
 |             |        |           |               |
 +-------------+--------+-----------+---------------+
 
-

Transformations:

<Sr> = <Sr>.diff/cumsum/rank/pct_change()     # …/fillna/ffill/interpolate()
-<Sr> = <Sr>.apply/agg/transform(<trans_func>)
-map?
-
-
+-------------+--------+-----------+---------------+
 |             | 'rank' | ['rank']  | {'r': 'rank'} |
 +-------------+--------+-----------+---------------+
@@ -2692,16 +2696,15 @@ b  3  4
 <DF>    = <DF>.transpose()                    # Rotates the table.
 <DF>    = <DF>.melt(id_vars=column_key/s)     # Melts on columns.
 
-
<Sr>    = <DF>.sum/max/mean/idxmax/all()
-<Sr>    = <DF>.apply/agg/transform(<agg_func>)
-<DF>    = <DF>.diff/cumsum/rank()             # …/pct_change/fillna/ffill/interpolate()
-<DF>    = <DF>.apply/agg/transform(<trans_func>)
-<DF>    = <DF>.applymap(<func>)               # Apply a function to a Dataframe elementwise.
+
<Sr> = <DF>.sum/max/mean/idxmax/all()         # Or: <DF>.apply/agg/transform(<agg_func>)
+<DF> = <DF>.diff/cumsum/rank/pct_change()     # Or: <DF>.apply/agg/transform(<trans_func>)
+<DF> = <DF>.fillna(<el>)                      # Or: <DF>.applymap(<map_func>)
 
    +
  • Also: 'ffill()' and 'interpolate()'.
  • All operations operate on columns by default. Use 'axis=1' parameter to process the rows instead.
-

Apply, Aggregate, Transform:

>>> df = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
+

Apply, Aggregate, Transform:

>>> df = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
    x  y
 a  1  2
 b  3  4
@@ -2783,7 +2786,7 @@ c  6  7
 
  • Result of an operation is a dataframe with index made up of group keys. Use '<DF>.reset_index()' to move the index back into it's own column.
-

Aggregations:

<DF> = <GB>.sum/max/mean/idxmax/all()
+

Aggregations:

<DF> = <GB>.sum/max/mean/idxmax/all()
 <DF> = <GB>.apply/agg/transform(<agg_func>)
 
@@ -2807,7 +2810,7 @@ c 6 7 | | c 11 13 | | | +-------------+------------+-------------+---------------+
-

Transformations:

<DF> = <GB>.diff/cumsum/rank()                # …/pct_change/fillna/ffill()
+

Transformations:

<DF> = <GB>.diff/cumsum/rank()                # …/pct_change/fillna/ffill()
 <DF> = <GB>.agg/transform(<trans_func>)