From c44e7fcd3cd0e8c475e5cb84cea63a2b4da7fc52 Mon Sep 17 00:00:00 2001 From: Woose Date: Mon, 30 Dec 2024 18:21:27 +0300 Subject: [PATCH] update --- module3.ipynb | 101 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 7 deletions(-) diff --git a/module3.ipynb b/module3.ipynb index c61ef32..d6f9089 100644 --- a/module3.ipynb +++ b/module3.ipynb @@ -464,13 +464,13 @@ "outputs": [], "source": [ "import re\n", - "print(re.search(r\"[a-zA-Z]{5}\", \"a ghost\"))\n", - "print(re.search(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\"))\n", - "print(re.findall(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\"))\n", - "re.findall(r\"\\b[a-zA-Z]{5}\\b\", \"A scary ghost appeared\")\n", - "print(re.findall(r\"\\w{5,10}\", \"I really like strawberries\"))\n", - "print(re.findall(r\"\\w{5,}\", \"I really like strawberries\"))\n", - "print(re.search(r\"s\\w{,20}\", \"I really like strawberries\"))\n", + "print(re.search(r\"[a-zA-Z]{5}\", \"a ghost\")) # This line searches for any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a ghost\". It won't find a match because there is no such sequence, so it returns None.\n", + "print(re.search(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\")) # This line searches for any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a scary ghost appeared\". It will find a match for 'scary' and return it as a Match object.\n", + "print(re.findall(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\")) # This line finds all (non-overlapping) occurrences of any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a scary ghost appeared\". It will find matches for 'scary' and 'ghost', returning them as a list of strings ['scary', 'ghost'].\n", + "re.findall(r\"\\b[a-zA-Z]{5}\\b\", \"A scary ghost appeared\") # This line finds all (non-overlapping) occurrences of any alphabetic character (a-z or A-Z) sequence of length 5 that are separate words in the string \"A scary ghost appeared\". It won't find a match for 'scary' and 'ghost' because they aren't standalone words, so it returns an empty list.\n", + "print(re.findall(r\"\\w{5,10}\", \"I really like strawberries\")) # This line finds all (non-overlapping) occurrences of a word composed of alphanumeric characters that is between 5 and 10 characters long in the string \"I really like strawberries\". It will find matches for 'really' and 'strawberries', returning them as a list of strings ['really', 'strawberries'].\n", + "print(re.findall(r\"\\w{5,}\", \"I really like strawberries\")) # This line finds all (non-overlapping) occurrences of a word composed of alphanumeric characters that is at least 5 characters long in the string \"I really like strawberries\". It will find matches for 'really' and 'strawberries', returning them as a list of strings ['really', 'strawberries'].\n", + "print(re.search(r\"s\\w{,20}\", \"I really like strawberries\")) # This line searches for any word that starts with the letter s followed by less than or equal to 20 alphanumeric characters in the string \"I really like strawberries\". It will find a match for 'strawberries' because it starts with 's' and is followed by fewer than or equal to 20 alphanumeric characters, returning it as a list of strings ['strawberries'].\n", "\n", "# Output:\n", "# \n", @@ -480,6 +480,93 @@ "# ['really', 'strawberries']\n", "# " ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extracting a PID using regexes in Python\n", + "import re\n", + "log = \"July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade\"\n", + "regex = r\"\\[(\\d+)\\]\"\n", + "result = re.search(regex, log)\n", + "result = re.search(regex, \"A completely different string that also has numbers [34567]\")\n", + "result = re.search(regex, \"99 elephants in a [cage]\")\n", + "def extract_pid(log_line):\n", + " regex = r\"\\[(\\d+)\\]\"\n", + " result = re.search(regex, log_line)\n", + " if result is None:\n", + " return \"\"\n", + " return result[1]\n", + "print(extract_pid(log))\n", + "print(extract_pid(\"99 elephants in a [cage]\"))\n", + "# Output:\n", + "# 12345" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "def extract_pid(log_line):\n", + " regex = r\"\\[(\\d+)\\]: (\\w+)\" # Modify regex to return uppercase message in parenthesis after process ID\n", + " result = re.search(regex, log_line)\n", + " if result is None:\n", + " return None\n", + " pid = result.groups()[0] # Fetch the first group (process id)\n", + " message = result.groups()[1] # fetch the second group (uppercase message)\n", + " return \"{} ({})\".format(pid, message) \n", + "\n", + "print(extract_pid(\"July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade\")) # 12345 (ERROR)\n", + "print(extract_pid(\"99 elephants in a [cage]\")) # None\n", + "print(extract_pid(\"A string that also has numbers [34567] but no uppercase message\")) # None\n", + "print(extract_pid(\"July 31 08:08:08 mycomputer new_process[67890]: RUNNING Performing backup\")) # 67890 (RUNNING)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting and replacing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "print(re.split(r\"[.?!]\", \"One sentence. Another one? And the last one!\"))\n", + "print(re.split(r\"([.?!])\", \"One sentence. Another one? And the last one!\"))\n", + "print(re.sub(r\"[\\w.%+-]+@[\\w.-]+\", \"[REDACTED]\", \"Received an email for go_nuts95@my.example.com\"))\n", + "print(re.sub(r\"^([\\w .-]*), ([\\w .-]*)$\", r\"\\2 \\1\", \"Lovelace, Ada\"))\n", + "\n", + "# Output:\n", + "# ['One sentence', ' Another one', ' And the last one', '']\n", + "# ['One sentence', '.', ' Another one', '?', ' And the last one', '!', '']\n", + "# Received an email for [REDACTED]\n", + "# Ada Lovelace" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {