This commit is contained in:
Yavuz Sava 2024-12-30 18:21:27 +03:00
parent 6837e4f0be
commit c44e7fcd3c

View File

@ -464,13 +464,13 @@
"outputs": [],
"source": [
"import re\n",
"print(re.search(r\"[a-zA-Z]{5}\", \"a ghost\"))\n",
"print(re.search(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\"))\n",
"print(re.findall(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\"))\n",
"re.findall(r\"\\b[a-zA-Z]{5}\\b\", \"A scary ghost appeared\")\n",
"print(re.findall(r\"\\w{5,10}\", \"I really like strawberries\"))\n",
"print(re.findall(r\"\\w{5,}\", \"I really like strawberries\"))\n",
"print(re.search(r\"s\\w{,20}\", \"I really like strawberries\"))\n",
"print(re.search(r\"[a-zA-Z]{5}\", \"a ghost\")) # This line searches for any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a ghost\". It won't find a match because there is no such sequence, so it returns None.\n",
"print(re.search(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\")) # This line searches for any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a scary ghost appeared\". It will find a match for 'scary' and return it as a Match object.\n",
"print(re.findall(r\"[a-zA-Z]{5}\", \"a scary ghost appeared\")) # This line finds all (non-overlapping) occurrences of any alphabetic character (a-z or A-Z) sequence of length 5 in the string \"a scary ghost appeared\". It will find matches for 'scary' and 'ghost', returning them as a list of strings ['scary', 'ghost'].\n",
"re.findall(r\"\\b[a-zA-Z]{5}\\b\", \"A scary ghost appeared\") # This line finds all (non-overlapping) occurrences of any alphabetic character (a-z or A-Z) sequence of length 5 that are separate words in the string \"A scary ghost appeared\". It won't find a match for 'scary' and 'ghost' because they aren't standalone words, so it returns an empty list.\n",
"print(re.findall(r\"\\w{5,10}\", \"I really like strawberries\")) # This line finds all (non-overlapping) occurrences of a word composed of alphanumeric characters that is between 5 and 10 characters long in the string \"I really like strawberries\". It will find matches for 'really' and 'strawberries', returning them as a list of strings ['really', 'strawberries'].\n",
"print(re.findall(r\"\\w{5,}\", \"I really like strawberries\")) # This line finds all (non-overlapping) occurrences of a word composed of alphanumeric characters that is at least 5 characters long in the string \"I really like strawberries\". It will find matches for 'really' and 'strawberries', returning them as a list of strings ['really', 'strawberries'].\n",
"print(re.search(r\"s\\w{,20}\", \"I really like strawberries\")) # This line searches for any word that starts with the letter s followed by less than or equal to 20 alphanumeric characters in the string \"I really like strawberries\". It will find a match for 'strawberries' because it starts with 's' and is followed by fewer than or equal to 20 alphanumeric characters, returning it as a list of strings ['strawberries'].\n",
"\n",
"# Output:\n",
"# <re.Match object; span=(2, 7), match='ghost'>\n",
@ -480,6 +480,93 @@
"# ['really', 'strawberries']\n",
"# <re.Match object; span=(14, 26), match='strawberries'>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Extracting a PID using regexes in Python\n",
"import re\n",
"log = \"July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade\"\n",
"regex = r\"\\[(\\d+)\\]\"\n",
"result = re.search(regex, log)\n",
"result = re.search(regex, \"A completely different string that also has numbers [34567]\")\n",
"result = re.search(regex, \"99 elephants in a [cage]\")\n",
"def extract_pid(log_line):\n",
" regex = r\"\\[(\\d+)\\]\"\n",
" result = re.search(regex, log_line)\n",
" if result is None:\n",
" return \"\"\n",
" return result[1]\n",
"print(extract_pid(log))\n",
"print(extract_pid(\"99 elephants in a [cage]\"))\n",
"# Output:\n",
"# 12345"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"def extract_pid(log_line):\n",
" regex = r\"\\[(\\d+)\\]: (\\w+)\" # Modify regex to return uppercase message in parenthesis after process ID\n",
" result = re.search(regex, log_line)\n",
" if result is None:\n",
" return None\n",
" pid = result.groups()[0] # Fetch the first group (process id)\n",
" message = result.groups()[1] # fetch the second group (uppercase message)\n",
" return \"{} ({})\".format(pid, message) \n",
"\n",
"print(extract_pid(\"July 31 07:51:48 mycomputer bad_process[12345]: ERROR Performing package upgrade\")) # 12345 (ERROR)\n",
"print(extract_pid(\"99 elephants in a [cage]\")) # None\n",
"print(extract_pid(\"A string that also has numbers [34567] but no uppercase message\")) # None\n",
"print(extract_pid(\"July 31 08:08:08 mycomputer new_process[67890]: RUNNING Performing backup\")) # 67890 (RUNNING)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Splitting and replacing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"print(re.split(r\"[.?!]\", \"One sentence. Another one? And the last one!\"))\n",
"print(re.split(r\"([.?!])\", \"One sentence. Another one? And the last one!\"))\n",
"print(re.sub(r\"[\\w.%+-]+@[\\w.-]+\", \"[REDACTED]\", \"Received an email for go_nuts95@my.example.com\"))\n",
"print(re.sub(r\"^([\\w .-]*), ([\\w .-]*)$\", r\"\\2 \\1\", \"Lovelace, Ada\"))\n",
"\n",
"# Output:\n",
"# ['One sentence', ' Another one', ' And the last one', '']\n",
"# ['One sentence', '.', ' Another one', '?', ' And the last one', '!', '']\n",
"# Received an email for [REDACTED]\n",
"# Ada Lovelace"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"----"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {