From a3278d23d6e0cdcb7dd1ec4dd2e1df11329122d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Sat, 5 Feb 2022 20:18:50 +0100 Subject: [PATCH] Pandas --- README.md | 22 +++++++++++----------- index.html | 28 +++++++++++++++------------- parse.js | 18 ++++-------------- pdf/index_for_pdf.html | 14 +++++++------- pdf/index_for_pdf_print.html | 2 +- 5 files changed, 38 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index 19daa0a..c056a15 100644 --- a/README.md +++ b/README.md @@ -3140,7 +3140,7 @@ Name: a, dtype: int64 .update() # Updates items that are already present. ``` -#### Aggregate, Transform, Map: +#### Series — Aggregate, Transform, Map: ```python = .sum/max/mean/idxmax/all() # Or: .agg(lambda : ) = .rank/diff/cumsum/ffill/interpl() # Or: .agg/transform(lambda : ) @@ -3161,9 +3161,7 @@ y 2 | sr.apply(…) | 3 | sum 3 | s 3 | | sr.agg(…) | | | | +-----------------+-------------+-------------+---------------+ -``` -```text +-----------------+-------------+-------------+---------------+ | | 'rank' | ['rank'] | {'r': 'rank'} | +-----------------+-------------+-------------+---------------+ @@ -3214,7 +3212,7 @@ b 3 4 = .sort_values(column_key/s) # Sorts rows by the passed column/s. ``` -#### Merge, Join, Concat: +#### DataFrame — Merge, Join, Concat: ```python >>> l = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']) x y @@ -3258,13 +3256,13 @@ c 6 7 +------------------------+---------------+------------+------------+--------------------------+ ``` -#### Aggregate, Transform, Map: +#### DataFrame — Aggregate, Transform, Map: ```python = .sum/max/mean/idxmax/all() # Or: .apply/agg(lambda : ) = .rank/diff/cumsum/ffill/interpl() # Or: .apply/agg/transform(lambda : ) = .fillna() # Or: .applymap(lambda : ) ``` -* **All operations operate on columns by default. Use `'axis=1'` parameter to process the rows instead. Transform passes DF to a function if it raises an error after receiving a Sr.** +* **All operations operate on columns by default. Pass `'axis=1'` to process the rows instead.** ```python >>> df = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y']) @@ -3281,9 +3279,7 @@ b 3 4 | df.agg(…) | x 4 | sum 4 6 | x 4 | | | y 6 | | | +-----------------+-------------+-------------+---------------+ -``` -```text +-----------------+-------------+-------------+---------------+ | | 'rank' | ['rank'] | {'x': 'rank'} | +-----------------+-------------+-------------+---------------+ @@ -3295,7 +3291,7 @@ b 3 4 ``` * **Use `'[col_key_1, col_key_2][row_key]'` to get the fifth result's values.** -#### Encode, Decode: +#### DataFrame — Encode, Decode, Plot: ```python = pd.read_json/html('') = pd.read_csv/pickle/excel('') @@ -3310,6 +3306,11 @@ b 3 4 .to_sql('', ) ``` +```python +import matplotlib.pyplot as plt +.plot.line/bar/hist/scatter([x=column_key, y=column_key/s]); plt.show() +``` + ### GroupBy **Object that groups together rows of a dataframe based on the value of the passed column.** @@ -3323,12 +3324,11 @@ c 7 8 ```python = .groupby(column_key/s) # DF is split into groups based on passed column. - = .get_group(group_key/s) # Selects a group by value of grouping column. = .apply() # Maps each group. Func can return DF, Sr or el. = [column_key] # A single column GB. All operations return a Sr. ``` -#### Aggregate, Transform, Map: +#### GroupBy — Aggregate, Transform, Map: ```python = .sum/max/mean/idxmax/all() # Or: .agg(lambda : ) = .rank/diff/cumsum/ffill() # Or: .transform(lambda : ) diff --git a/index.html b/index.html index 1907d86..911cec4 100644 --- a/index.html +++ b/index.html @@ -54,7 +54,7 @@
- +
@@ -2549,7 +2549,7 @@ Name: a, dtype: int64 <Sr> = <Sr>.combine_first(<Sr>) # Adds items that are not yet present. <Sr>.update(<Sr>) # Updates items that are already present. -

Aggregate, Transform, Map:

<el> = <Sr>.sum/max/mean/idxmax/all()         # Or: <Sr>.agg(lambda <Sr>: <el>)
+

Series — Aggregate, Transform, Map:

<el> = <Sr>.sum/max/mean/idxmax/all()         # Or: <Sr>.agg(lambda <Sr>: <el>)
 <Sr> = <Sr>.rank/diff/cumsum/ffill/interpl()  # Or: <Sr>.agg/transform(lambda <Sr>: <Sr>)
 <Sr> = <Sr>.fillna(<el>)                      # Or: <Sr>.agg/transform/map(lambda <el>: <el>)
 
@@ -2567,8 +2567,8 @@ y 2 ┃ sr.apply(…) │ 3 │ sum 3 │ s 3 ┃ ┃ sr.agg(…) │ │ │ ┃ ┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛ -
-
┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
+
+┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
 ┃                 │    'rank'   │   ['rank']  │ {'r': 'rank'} ┃
 ┠─────────────────┼─────────────┼─────────────┼───────────────┨
 ┃ sr.apply(…)     │             │      rank   │               ┃
@@ -2606,7 +2606,7 @@ b  3  4
 <DF>    = <DF>.sort_index(ascending=True)     # Sorts rows by row keys.
 <DF>    = <DF>.sort_values(column_key/s)      # Sorts rows by the passed column/s.
 
-

Merge, Join, Concat:

>>> l = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
+

DataFrame — Merge, Join, Concat:

>>> l = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
    x  y
 a  1  2
 b  3  4
@@ -2646,13 +2646,13 @@ c  6  7
 ┃                        │ c  .   6   7  │            │            │ R must be a DataFrame.   ┃
 ┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━┛
 
-

Aggregate, Transform, Map:

<Sr> = <DF>.sum/max/mean/idxmax/all()         # Or: <DF>.apply/agg(lambda <Sr>: <el>)
+

DataFrame — Aggregate, Transform, Map:

<Sr> = <DF>.sum/max/mean/idxmax/all()         # Or: <DF>.apply/agg(lambda <Sr>: <el>)
 <DF> = <DF>.rank/diff/cumsum/ffill/interpl()  # Or: <DF>.apply/agg/transform(lambda <Sr>: <Sr>)
 <DF> = <DF>.fillna(<el>)                      # Or: <DF>.applymap(lambda <el>: <el>)
 
    -
  • All operations operate on columns by default. Use 'axis=1' parameter to process the rows instead. Transform passes DF to a function if it raises an error after receiving a Sr.
  • +
  • All operations operate on columns by default. Pass 'axis=1' to process the rows instead.
>>> df = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])
    x  y
@@ -2666,8 +2666,8 @@ b  3  4
 ┃ df.agg(…)       │     x  4    │  sum  4  6  │     x  4      ┃
 ┃                 │     y  6    │             │               ┃
 ┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
-
-
┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
+
+┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
 ┃                 │    'rank'   │   ['rank']  │ {'x': 'rank'} ┃
 ┠─────────────────┼─────────────┼─────────────┼───────────────┨
 ┃ df.apply(…)     │      x  y   │      x    y │        x      ┃
@@ -2679,7 +2679,7 @@ b  3  4
 
  • Use '<DF>[col_key_1, col_key_2][row_key]' to get the fifth result's values.
-

Encode, Decode:

<DF> = pd.read_json/html('<str/path/url>')
+

DataFrame — Encode, Decode, Plot:

<DF> = pd.read_json/html('<str/path/url>')
 <DF> = pd.read_csv/pickle/excel('<path/url>')
 <DF> = pd.read_sql('<table_name/query>', <connection>)
 <DF> = pd.read_clipboard()
@@ -2690,6 +2690,9 @@ b  3  4
 <DF>.to_pickle/excel(<path>)
 <DF>.to_sql('<table_name>', <connection>)
 
+
import matplotlib.pyplot as plt
+<DF>.plot.line/bar/hist/scatter([x=column_key, y=column_key/s]); plt.show()
+

GroupBy

Object that groups together rows of a dataframe based on the value of the passed column.

>>> df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], index=list('abc'), columns=list('xyz'))
 >>> df.groupby('z').get_group(6)
    x  y
@@ -2699,11 +2702,10 @@ c  7  8
 
 
 
<GB> = <DF>.groupby(column_key/s)             # DF is split into groups based on passed column.
-<DF> = <GB>.get_group(group_key/s)            # Selects a group by value of grouping column.
 <DF> = <GB>.apply(<func>)                     # Maps each group. Func can return DF, Sr or el.
 <GB> = <GB>[column_key]                       # A single column GB. All operations return a Sr.
 
-

Aggregate, Transform, Map:

<DF> = <GB>.sum/max/mean/idxmax/all()         # Or: <GB>.agg(lambda <Sr>: <el>)
+

GroupBy — Aggregate, Transform, Map:

<DF> = <GB>.sum/max/mean/idxmax/all()         # Or: <GB>.agg(lambda <Sr>: <el>)
 <DF> = <GB>.rank/diff/cumsum/ffill()          # Or: <GB>.transform(lambda <Sr>: <Sr>)
 <DF> = <GB>.fillna(<el>)                      # Or: <GB>.transform(lambda <Sr>: <Sr>)
 
@@ -2881,7 +2883,7 @@ $ pyinstaller script.py --add-data '<path>:.'
- +
diff --git a/parse.js b/parse.js index 13b06bf..7c2443a 100755 --- a/parse.js +++ b/parse.js @@ -316,12 +316,8 @@ const DIAGRAM_13_B = "┠─────────────────┼─────────────┼─────────────┼───────────────┨\n" + "┃ sr.apply(…) │ 3 │ sum 3 │ s 3 ┃\n" + "┃ sr.agg(…) │ │ │ ┃\n" + - "┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n"; - -const DIAGRAM_14_A = - '| sr.apply(…) | | rank | |'; - -const DIAGRAM_14_B = + "┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n" + + "\n" + "┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓\n" + "┃ │ 'rank' │ ['rank'] │ {'r': 'rank'} ┃\n" + "┠─────────────────┼─────────────┼─────────────┼───────────────┨\n" + @@ -374,12 +370,8 @@ const DIAGRAM_16_B = "┃ df.apply(…) │ │ x y │ ┃\n" + "┃ df.agg(…) │ x 4 │ sum 4 6 │ x 4 ┃\n" + "┃ │ y 6 │ │ ┃\n" + - "┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n"; - -const DIAGRAM_17_A = - '| df.apply(…) | x y | x y | x |'; - -const DIAGRAM_17_B = + "┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛\n" + + "\n" + "┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓\n" + "┃ │ 'rank' │ ['rank'] │ {'x': 'rank'} ┃\n" + "┠─────────────────┼─────────────┼─────────────┼───────────────┨\n" + @@ -524,10 +516,8 @@ function updateDiagrams() { $(`code:contains(${DIAGRAM_11_A})`).html(DIAGRAM_11_B); $(`code:contains(${DIAGRAM_12_A})`).html(DIAGRAM_12_B).removeClass("text").removeClass("language-text").addClass("python"); $(`code:contains(${DIAGRAM_13_A})`).html(DIAGRAM_13_B).removeClass("text").removeClass("language-text").addClass("python"); - $(`code:contains(${DIAGRAM_14_A})`).html(DIAGRAM_14_B).removeClass("text").removeClass("language-text").addClass("python"); $(`code:contains(${DIAGRAM_15_A})`).html(DIAGRAM_15_B).removeClass("text").removeClass("language-text").addClass("python"); $(`code:contains(${DIAGRAM_16_A})`).html(DIAGRAM_16_B).removeClass("text").removeClass("language-text").addClass("python"); - $(`code:contains(${DIAGRAM_17_A})`).html(DIAGRAM_17_B).removeClass("text").removeClass("language-text").addClass("python"); $(`code:contains(${DIAGRAM_18_A})`).html(DIAGRAM_18_B).removeClass("text").removeClass("language-text").addClass("python"); } diff --git a/pdf/index_for_pdf.html b/pdf/index_for_pdf.html index 365b8c1..92976e2 100644 --- a/pdf/index_for_pdf.html +++ b/pdf/index_for_pdf.html @@ -32,7 +32,7 @@ copy function, 15
coroutine, 33
counter, 2, 4, 12, 17
-csv, 26, 34, 46, 47
+csv, 26, 34, 46, 47
curses module, 33, 34
cython, 49

D

@@ -48,7 +48,7 @@ eval function, 33
exceptions, 20-21, 23, 35

F

-

files, 22-29, 34, 46
+

files, 22-29, 34, 46
filter function, 11
floats, 4, 6, 7
format, 6-7, 37
@@ -73,7 +73,7 @@ iterator, 3-4, 11, 17
itertools module, 3, 8

J

-

json, 25, 36, 46

+

json, 25, 36, 46

L

lambda, 11
list comprehension, 11
@@ -102,7 +102,7 @@ paths, 23-24, 34
pickle module, 25
pillow library, 39-40
-plotting, 34, 47-48
+plotting, 34, 46, 47-48
print function, 22
profiling, 36-37
progress bar, 34
@@ -118,7 +118,7 @@ regular expressions, 5-6
requests library, 35, 36

S

-

scraping, 35, 43, 46, 47-48
+

scraping, 35, 43, 46, 47-48
sequence, 4, 18-19
sets, 2, 4, 11, 19, 21, 31
shell commands, 25
@@ -126,7 +126,7 @@ slots attribute, 15
sortable, 1, 16
splat operator, 10-11
-sql, 27, 46
+sql, 27, 46
statistics, 7, 37-38, 44-48
strings, 4-7, 14
struct module, 28-29
@@ -134,7 +134,7 @@ super function, 14
sys module, 13, 21-22

T

-

table, 26, 27, 34, 37-38, 45-46
+

table, 26, 27, 34, 37-38, 45-46
template, 6, 36
threading module, 30
time module, 34, 36
diff --git a/pdf/index_for_pdf_print.html b/pdf/index_for_pdf_print.html index 3aa2bfe..f9964fe 100644 --- a/pdf/index_for_pdf_print.html +++ b/pdf/index_for_pdf_print.html @@ -102,7 +102,7 @@ paths, 23-24, 34
pickle module, 25
pillow library, 39-40
-plotting, 34, 47-48
+plotting, 34, 46, 47-48
print function, 22
profiling, 36-37
progress bar, 34