From a0cf5a6f0364cc771672916045b9b2d3a9fe7a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jure=20=C5=A0orn?= Date: Thu, 4 Jan 2024 14:33:21 +0100 Subject: [PATCH] Regex --- README.md | 10 +++++----- index.html | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index e289592..6df5cc0 100644 --- a/README.md +++ b/README.md @@ -370,7 +370,7 @@ import re ### Match Object ```python = .group() # Returns the whole match. Also group(0). - = .group(1) # Returns the part inside first brackets. + = .group(1) # Returns part inside the first brackets. = .groups() # Returns all bracketed parts. = .start() # Returns start index of the match. = .end() # Returns exclusive end index of the match. @@ -378,13 +378,13 @@ import re ### Special Sequences ```python -'\d' == '[0-9]' # Matches decimal characters. -'\w' == '[a-zA-Z0-9_]' # Matches alphanumerics and underscore. -'\s' == '[ \t\n\r\f\v]' # Matches whitespaces. +'\d' == '[0-9]' # Also [०-९…]. Matches a decimal character. +'\w' == '[a-zA-Z0-9_]' # Also [ª²³…]. Matches an alphanumeric or _. +'\s' == '[ \t\n\r\f\v]' # Also [\x1c-\x1f…]. Matches a whitespace. ``` * **By default, decimal characters, alphanumerics and whitespaces from all alphabets are matched unless `'flags=re.ASCII'` argument is used.** -* **As shown above, it restricts all special sequence matches to the first 128 characters and prevents `'\s'` from accepting `'[\x1c-\x1f]'` (the so-called separator characters).** +* **It restricts special sequence matches to `'[\x00-\x7f]'` (the first 128 characters) and also prevents `'\s'` from accepting `'[\x1c-\x1f]'` (the so-called separator characters).** * **Use a capital letter for negation (all non-ASCII characters will be matched when used in combination with ASCII flag).** diff --git a/index.html b/index.html index e0fab42..d4733d3 100644 --- a/index.html +++ b/index.html @@ -344,20 +344,20 @@ Point(x=1, y=2
  • 're.compile(<regex>)' returns a Pattern object with listed methods.
  • Match Object

    <str>   = <Match>.group()                      # Returns the whole match. Also group(0).
    -<str>   = <Match>.group(1)                     # Returns the part inside first brackets.
    +<str>   = <Match>.group(1)                     # Returns part inside the first brackets.
     <tuple> = <Match>.groups()                     # Returns all bracketed parts.
     <int>   = <Match>.start()                      # Returns start index of the match.
     <int>   = <Match>.end()                        # Returns exclusive end index of the match.
     
    -

    Special Sequences

    '\d' == '[0-9]'                                # Matches decimal characters.
    -'\w' == '[a-zA-Z0-9_]'                         # Matches alphanumerics and underscore.
    -'\s' == '[ \t\n\r\f\v]'                        # Matches whitespaces.
    +

    Special Sequences

    '\d' == '[0-9]'                                # Also [०-९…]. Matches a decimal character.
    +'\w' == '[a-zA-Z0-9_]'                         # Also [ª²³…]. Matches an alphanumeric or _.
    +'\s' == '[ \t\n\r\f\v]'                        # Also [\x1c-\x1f…]. Matches a whitespace.
     
    • By default, decimal characters, alphanumerics and whitespaces from all alphabets are matched unless 'flags=re.ASCII' argument is used.
    • -
    • As shown above, it restricts all special sequence matches to the first 128 characters and prevents '\s' from accepting '[\x1c-\x1f]' (the so-called separator characters).
    • +
    • It restricts special sequence matches to '[\x00-\x7f]' (the first 128 characters) and also prevents '\s' from accepting '[\x1c-\x1f]' (the so-called separator characters).
    • Use a capital letter for negation (all non-ASCII characters will be matched when used in combination with ASCII flag).

    #Format

    <str> = f'{<el_1>}, {<el_2>}'            # Curly brackets can also contain expressions.