From c41c5b2ad3d97db9a5a9708ff70d0eaa3939a0dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Wed, 11 Oct 2023 05:23:22 +0200 Subject: [PATCH] String and Regex update --- README.md | 34 ++++++++++++++++++---------------- index.html | 46 +++++++++++++++++++++------------------------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index ba3d0bd..cf2ac92 100644 --- a/README.md +++ b/README.md @@ -300,9 +300,11 @@ True String ------ +**Immutable sequence of characters.** + ```python = .strip() # Strips all whitespace characters from both ends. - = .strip('') # Strips all passed characters from both ends. + = .strip('') # Strips passed characters. Also lstrip/rstrip(). ``` ```python @@ -321,6 +323,7 @@ String ``` ```python + = .lower() # Changes the case. Also upper/capitalize/title(). = .replace(old, new [, count]) # Replaces 'old' with 'new' at most 'count' times. = .translate() # Use `str.maketrans()` to generate table. ``` @@ -329,38 +332,37 @@ String = chr() # Converts int to Unicode character. = ord() # Converts Unicode character to int. ``` -* **Also: `'lstrip()'`, `'rstrip()'` and `'rsplit()'`.** -* **Also: `'lower()'`, `'upper()'`, `'capitalize()'` and `'title()'`.** +* **Use `'unicodedata.normalize("NFC", )'` on strings that may contain characters like `'Ö'` before comparing them, because they can be stored as one or two characters.** ### Property Methods -```text -+---------------+----------+----------+----------+----------+----------+ -| | [ !#$%…] | [a-zA-Z] | [¼½¾] | [²³¹] | [0-9] | -+---------------+----------+----------+----------+----------+----------+ -| isprintable() | yes | yes | yes | yes | yes | -| isalnum() | | yes | yes | yes | yes | -| isnumeric() | | | yes | yes | yes | -| isdigit() | | | | yes | yes | -| isdecimal() | | | | | yes | -+---------------+----------+----------+----------+----------+----------+ +```python + = .isdecimal() # Checks for [0-9]. + = .isdigit() # Checks for [²³¹] and isdecimal(). + = .isnumeric() # Checks for [¼½¾] and isdigit(). + = .isalnum() # Checks for [a-zA-Z] and isnumeric(). + = .isprintable() # Checks for [ !#$%…] and isalnum(). + = .isspace() # Checks for [ \t\n\r\f\v\x1c-\x1f\x85\xa0…]. ``` -* **`'isspace()'` checks for whitespaces: `'[ \t\n\r\f\v\x1c-\x1f\x85\xa0\u1680…]'`.** Regex ----- +**Functions for regular expression matching.** + ```python import re +``` + +```python = re.sub(, new, text, count=0) # Substitutes all occurrences with 'new'. = re.findall(, text) # Returns all occurrences as strings. = re.split(, text, maxsplit=0) # Add brackets around regex to include matches. - = re.search(, text) # Searches for first occurrence of the pattern. + = re.search(, text) # First occurrence of the pattern or None. = re.match(, text) # Searches only at the beginning of the text. = re.finditer(, text) # Returns all occurrences as Match objects. ``` * **Argument 'new' can be a function that accepts a Match object and returns a string.** -* **Search() and match() return None if they can't find a match.** * **Argument `'flags=re.IGNORECASE'` can be used with all functions.** * **Argument `'flags=re.MULTILINE'` makes `'^'` and `'$'` match the start/end of each line.** * **Argument `'flags=re.DOTALL'` makes `'.'` also accept the `'\n'`.** diff --git a/index.html b/index.html index 998f95e..869da71 100644 --- a/index.html +++ b/index.html @@ -54,7 +54,7 @@
- +
@@ -290,10 +290,11 @@ Point(x=1, y=2 ┃ decimal.Decimal │ ✓ │ │ │ │ ┃ ┗━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┛ -

#String

<str>  = <str>.strip()                       # Strips all whitespace characters from both ends.
-<str>  = <str>.strip('<chars>')              # Strips all passed characters from both ends.
+

#String

Immutable sequence of characters.

<str>  = <str>.strip()                       # Strips all whitespace characters from both ends.
+<str>  = <str>.strip('<chars>')              # Strips passed characters. Also lstrip/rstrip().
 
+
<list> = <str>.split()                       # Splits on one or more whitespace characters.
 <list> = <str>.split(sep=None, maxsplit=-1)  # Splits on 'sep' str at most 'maxsplit' times.
 <list> = <str>.splitlines(keepends=False)    # On [\n\r\f\v\x1c-\x1e\x85\u2028\u2029] and \r\n.
@@ -305,42 +306,37 @@ Point(x=1, y=2
 <int>  = <str>.find(<sub_str>)               # Returns start index of the first match or -1.
 <int>  = <str>.index(<sub_str>)              # Same, but raises ValueError if missing.
 
-
<str>  = <str>.replace(old, new [, count])   # Replaces 'old' with 'new' at most 'count' times.
+
<str>  = <str>.lower()                       # Changes the case. Also upper/capitalize/title().
+<str>  = <str>.replace(old, new [, count])   # Replaces 'old' with 'new' at most 'count' times.
 <str>  = <str>.translate(<table>)            # Use `str.maketrans(<dict>)` to generate table.
 
<str>  = chr(<int>)                          # Converts int to Unicode character.
 <int>  = ord(<str>)                          # Converts Unicode character to int.
 
    -
  • Also: 'lstrip()', 'rstrip()' and 'rsplit()'.
  • -
  • Also: 'lower()', 'upper()', 'capitalize()' and 'title()'.
  • +
  • Use 'unicodedata.normalize("NFC", <str>)' on strings that may contain characters like 'Ö' before comparing them, because they can be stored as one or two characters.
-

Property Methods

┏━━━━━━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┓
-┃               │ [ !#$%…] │ [a-zA-Z] │  [¼½¾]   │  [²³¹]   │  [0-9]   ┃
-┠───────────────┼──────────┼──────────┼──────────┼──────────┼──────────┨
-┃ isprintable() │    ✓     │    ✓     │    ✓     │    ✓     │    ✓     ┃
-┃ isalnum()     │          │    ✓     │    ✓     │    ✓     │    ✓     ┃
-┃ isnumeric()   │          │          │    ✓     │    ✓     │    ✓     ┃
-┃ isdigit()     │          │          │          │    ✓     │    ✓     ┃
-┃ isdecimal()   │          │          │          │          │    ✓     ┃
-┗━━━━━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┛
+

Property Methods

<bool> = <str>.isdecimal()                   # Checks for [0-9].
+<bool> = <str>.isdigit()                     # Checks for [²³¹] and isdecimal().
+<bool> = <str>.isnumeric()                   # Checks for [¼½¾] and isdigit().
+<bool> = <str>.isalnum()                     # Checks for [a-zA-Z] and isnumeric().
+<bool> = <str>.isprintable()                 # Checks for [ !#$%…] and isalnum().
+<bool> = <str>.isspace()                     # Checks for [ \t\n\r\f\v\x1c-\x1f\x85\xa0…].
 
-
    -
  • 'isspace()' checks for whitespaces: '[ \t\n\r\f\v\x1c-\x1f\x85\xa0\u1680…]'.
  • -
-

#Regex

import re
-<str>   = re.sub(<regex>, new, text, count=0)  # Substitutes all occurrences with 'new'.
+

#Regex

Functions for regular expression matching.

import re
+
+ + +
<str>   = re.sub(<regex>, new, text, count=0)  # Substitutes all occurrences with 'new'.
 <list>  = re.findall(<regex>, text)            # Returns all occurrences as strings.
 <list>  = re.split(<regex>, text, maxsplit=0)  # Add brackets around regex to include matches.
-<Match> = re.search(<regex>, text)             # Searches for first occurrence of the pattern.
+<Match> = re.search(<regex>, text)             # First occurrence of the pattern or None.
 <Match> = re.match(<regex>, text)              # Searches only at the beginning of the text.
 <iter>  = re.finditer(<regex>, text)           # Returns all occurrences as Match objects.
-
- +
  • Argument 'new' can be a function that accepts a Match object and returns a string.
  • -
  • Search() and match() return None if they can't find a match.
  • Argument 'flags=re.IGNORECASE' can be used with all functions.
  • Argument 'flags=re.MULTILINE' makes '^' and '$' match the start/end of each line.
  • Argument 'flags=re.DOTALL' makes '.' also accept the '\n'.
  • @@ -2929,7 +2925,7 @@ $ deactivate # Deactivates the activ