From 794a3599aa930730a79ca2bebc7b589df5f362c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Tue, 12 Mar 2024 06:21:49 +0100 Subject: [PATCH] Regex --- README.md | 33 ++++++++++++++++----------------- index.html | 37 ++++++++++++++++++------------------- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 5031fd1..a5e167f 100644 --- a/README.md +++ b/README.md @@ -351,36 +351,35 @@ Regex ```python import re - = re.sub(, new, text, count=0) # Substitutes all occurrences with 'new'. - = re.findall(, text) # Returns all occurrences as strings. - = re.split(, text, maxsplit=0) # Add brackets around regex to include matches. - = re.search(, text) # First occurrence of the pattern or None. - = re.match(, text) # Searches only at the beginning of the text. - = re.finditer(, text) # Returns all occurrences as Match objects. + = re.sub(r'', new, text, count=0) # Substitutes all occurrences with 'new'. + = re.findall(r'', text) # Returns all occurrences as strings. + = re.split(r'', text, maxsplit=0) # Add brackets around regex to keep matches. + = re.search(r'', text) # First occurrence of the pattern or None. + = re.match(r'', text) # Searches only at the beginning of the text. + = re.finditer(r'', text) # Returns all occurrences as Match objects. ``` -* **Argument 'new' can be a function that accepts a Match object and returns a string.** +* **Raw string literals do not interpret escape sequences, thus enabling us to use regex-specific escape sequences that cause SyntaxWarning in normal string literals.** +* **Argument 'new' of re.sub() can be a function that accepts a Match object and returns a str.** * **Argument `'flags=re.IGNORECASE'` can be used with all functions.** * **Argument `'flags=re.MULTILINE'` makes `'^'` and `'$'` match the start/end of each line.** * **Argument `'flags=re.DOTALL'` makes `'.'` also accept the `'\n'`.** -* **Use `r'\1'` or `'\\1'` for backreference (`'\1'` returns a character with octal code 1).** -* **Add `'?'` after `'*'` and `'+'` to make them non-greedy.** * **`'re.compile()'` returns a Pattern object with methods sub(), findall(), …** ### Match Object ```python - = .group() # Returns the whole match. Also group(0). - = .group(1) # Returns part inside the first brackets. - = .groups() # Returns all bracketed parts. - = .start() # Returns start index of the match. - = .end() # Returns exclusive end index of the match. + = .group() # Returns the whole match. Also group(0). + = .group(1) # Returns part inside the first brackets. + = .groups() # Returns all bracketed parts. + = .start() # Returns start index of the match. + = .end() # Returns exclusive end index of the match. ``` ### Special Sequences ```python -'\d' == '[0-9]' # Also [०-९…]. Matches a decimal character. -'\w' == '[a-zA-Z0-9_]' # Also [ª²³…]. Matches an alphanumeric or _. -'\s' == '[ \t\n\r\f\v]' # Also [\x1c-\x1f…]. Matches a whitespace. +'\d' == '[0-9]' # Also [०-९…]. Matches a decimal character. +'\w' == '[a-zA-Z0-9_]' # Also [ª²³…]. Matches an alphanumeric or _. +'\s' == '[ \t\n\r\f\v]' # Also [\x1c-\x1f…]. Matches a whitespace. ``` * **By default, decimal characters, alphanumerics and whitespaces from all alphabets are matched unless `'flags=re.ASCII'` argument is used.** diff --git a/index.html b/index.html index f2849e4..0541e71 100644 --- a/index.html +++ b/index.html @@ -54,7 +54,7 @@
- +
@@ -325,34 +325,33 @@ Point(x=1, y=2

#Regex

Functions for regular expression matching.

import re
-<str>   = re.sub(<regex>, new, text, count=0)  # Substitutes all occurrences with 'new'.
-<list>  = re.findall(<regex>, text)            # Returns all occurrences as strings.
-<list>  = re.split(<regex>, text, maxsplit=0)  # Add brackets around regex to include matches.
-<Match> = re.search(<regex>, text)             # First occurrence of the pattern or None.
-<Match> = re.match(<regex>, text)              # Searches only at the beginning of the text.
-<iter>  = re.finditer(<regex>, text)           # Returns all occurrences as Match objects.
+<str>   = re.sub(r'<regex>', new, text, count=0)  # Substitutes all occurrences with 'new'.
+<list>  = re.findall(r'<regex>', text)            # Returns all occurrences as strings.
+<list>  = re.split(r'<regex>', text, maxsplit=0)  # Add brackets around regex to keep matches.
+<Match> = re.search(r'<regex>', text)             # First occurrence of the pattern or None.
+<Match> = re.match(r'<regex>', text)              # Searches only at the beginning of the text.
+<iter>  = re.finditer(r'<regex>', text)           # Returns all occurrences as Match objects.
 
    -
  • Argument 'new' can be a function that accepts a Match object and returns a string.
  • +
  • Raw string literals do not interpret escape sequences, thus enabling us to use regex-specific escape sequences that cause SyntaxWarning in normal string literals.
  • +
  • Argument 'new' of re.sub() can be a function that accepts a Match object and returns a str.
  • Argument 'flags=re.IGNORECASE' can be used with all functions.
  • Argument 'flags=re.MULTILINE' makes '^' and '$' match the start/end of each line.
  • Argument 'flags=re.DOTALL' makes '.' also accept the '\n'.
  • -
  • Use r'\1' or '\\1' for backreference ('\1' returns a character with octal code 1).
  • -
  • Add '?' after '*' and '+' to make them non-greedy.
  • 're.compile(<regex>)' returns a Pattern object with methods sub(), findall(), …
-

Match Object

<str>   = <Match>.group()                      # Returns the whole match. Also group(0).
-<str>   = <Match>.group(1)                     # Returns part inside the first brackets.
-<tuple> = <Match>.groups()                     # Returns all bracketed parts.
-<int>   = <Match>.start()                      # Returns start index of the match.
-<int>   = <Match>.end()                        # Returns exclusive end index of the match.
+

Match Object

<str>   = <Match>.group()                         # Returns the whole match. Also group(0).
+<str>   = <Match>.group(1)                        # Returns part inside the first brackets.
+<tuple> = <Match>.groups()                        # Returns all bracketed parts.
+<int>   = <Match>.start()                         # Returns start index of the match.
+<int>   = <Match>.end()                           # Returns exclusive end index of the match.
 
-

Special Sequences

'\d' == '[0-9]'                                # Also [०-९…]. Matches a decimal character.
-'\w' == '[a-zA-Z0-9_]'                         # Also [ª²³…]. Matches an alphanumeric or _.
-'\s' == '[ \t\n\r\f\v]'                        # Also [\x1c-\x1f…]. Matches a whitespace.
+

Special Sequences

'\d' == '[0-9]'                                   # Also [०-९…]. Matches a decimal character.
+'\w' == '[a-zA-Z0-9_]'                            # Also [ª²³…]. Matches an alphanumeric or _.
+'\s' == '[ \t\n\r\f\v]'                           # Also [\x1c-\x1f…]. Matches a whitespace.
 
    @@ -2934,7 +2933,7 @@ $ deactivate # Deactivates the activ