update

2024-12-30 18:21:27 +03:00 · 2024-12-30 18:21:27 +03:00 · c44e7fcd3c
commit c44e7fcd3c
parent 6837e4f0be
1 changed files with 94 additions and 7 deletions
--- a/module3.ipynb
+++ b/module3.ipynb
@ -464,13 +464,13 @@
   "outputs": [],
   "source": [
    "import re\n",
-    "print(re.search(r\"[a-zA-Z]{5}\", \"a ghost\"))\n",
-    "print(re.search(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\"))\n",
-    "print(re.findall(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\"))\n",
-    "re.findall(r\"\\b[a-zA-Z]{5}\\b\", \"A scary ghost appeared\")\n",
-    "print(re.findall(r\"\\w{5,10}\", \"I really like strawberries\"))\n",
-    "print(re.findall(r\"\\w{5,}\", \"I really like strawberries\"))\n",
-    "print(re.search(r\"s\\w{,20}\", \"I really like strawberries\"))\n",
+    "print(re.search(r\"[a-zA-Z]{5}\", \"a ghost\")) # This line searches for any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a ghost\". It won't find a match because there is no such sequence, so it returns None.\n",
+    "print(re.search(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\")) # This line searches for any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a scary ghost appeared\". It will find a match for 'scary' and return it as a Match object.\n",
+    "print(re.findall(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\")) # This line finds all (non-overlapping) occurrences of any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a scary ghost appeared\". It will find matches for 'scary' and 'ghost', returning them as a list of strings ['scary', 'ghost'].\n",
+    "re.findall(r\"\\b[a-zA-Z]{5}\\b\", \"A scary ghost appeared\") # This line finds all (non-overlapping) occurrences of any alphabetic character (a-z or A-Z) sequence of length 5 that are separate words in the string \"A scary ghost appeared\". It won't find a match for 'scary' and 'ghost' because they aren't standalone words, so it returns an empty list.\n",
+    "print(re.findall(r\"\\w{5,10}\", \"I really like strawberries\")) # This line finds all (non-overlapping) occurrences of a word composed of alphanumeric characters that is between 5 and 10 characters long in the string \"I really like strawberries\". It will find matches for 'really' and 'strawberries', returning them as a list of strings ['really', 'strawberries'].\n",
+    "print(re.findall(r\"\\w{5,}\", \"I really like strawberries\")) # This line finds all (non-overlapping) occurrences of a word composed of alphanumeric characters that is at least 5 characters long in the string \"I really like strawberries\". It will find matches for 'really' and 'strawberries', returning them as a list of strings ['really', 'strawberries'].\n",
+    "print(re.search(r\"s\\w{,20}\", \"I really like strawberries\")) # This line searches for any word that starts with the letter s followed by less than or equal to 20 alphanumeric characters in the string \"I really like strawberries\". It will find a match for 'strawberries' because it starts with 's' and is followed by fewer than or equal to 20 alphanumeric characters, returning it as a list of strings ['strawberries'].\n",
    "\n",
    "# Output:\n",
    "# <re.Match object; span=(2, 7), match='ghost'>\n",
@ -480,6 +480,93 @@
    "# ['really', 'strawberries']\n",
    "# <re.Match object; span=(14, 26), match='strawberries'>"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extracting a PID using regexes in Python\n",
+    "import re\n",
+    "log = \"July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade\"\n",
+    "regex = r\"\\[(\\d+)\\]\"\n",
+    "result = re.search(regex, log)\n",
+    "result = re.search(regex, \"A completely different string that also has numbers [34567]\")\n",
+    "result = re.search(regex, \"99 elephants in a [cage]\")\n",
+    "def extract_pid(log_line):\n",
+    "    regex = r\"\\[(\\d+)\\]\"\n",
+    "    result = re.search(regex, log_line)\n",
+    "    if result is None:\n",
+    "        return \"\"\n",
+    "    return result[1]\n",
+    "print(extract_pid(log))\n",
+    "print(extract_pid(\"99 elephants in a [cage]\"))\n",
+    "# Output:\n",
+    "# 12345"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "def extract_pid(log_line):\n",
+    "    regex = r\"\\[(\\d+)\\]: (\\w+)\"   # Modify regex to return uppercase message in parenthesis after process ID\n",
+    "    result = re.search(regex, log_line)\n",
+    "    if result is None:\n",
+    "        return None\n",
+    "    pid = result.groups()[0]  # Fetch the first group (process id)\n",
+    "    message = result.groups()[1] # fetch the second group (uppercase message)\n",
+    "    return \"{} ({})\".format(pid, message) \n",
+    "\n",
+    "print(extract_pid(\"July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade\")) # 12345 (ERROR)\n",
+    "print(extract_pid(\"99 elephants in a [cage]\")) # None\n",
+    "print(extract_pid(\"A string that also has numbers [34567] but no uppercase message\")) # None\n",
+    "print(extract_pid(\"July 31 08:08:08 mycomputer new_process[67890]: RUNNING Performing backup\")) # 67890 (RUNNING)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Splitting and replacing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "print(re.split(r\"[.?!]\", \"One sentence. Another one? And the last one!\"))\n",
+    "print(re.split(r\"([.?!])\", \"One sentence. Another one? And the last one!\"))\n",
+    "print(re.sub(r\"[\\w.%+-]+@[\\w.-]+\", \"[REDACTED]\", \"Received an email for go_nuts95@my.example.com\"))\n",
+    "print(re.sub(r\"^([\\w .-]*), ([\\w .-]*)$\", r\"\\2 \\1\", \"Lovelace, Ada\"))\n",
+    "\n",
+    "# Output:\n",
+    "# ['One sentence', ' Another one', ' And the last one', '']\n",
+    "# ['One sentence', '.', ' Another one', '?', ' And the last one', '!', '']\n",
+    "# Received an email for [REDACTED]\n",
+    "# Ada Lovelace"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "----"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {