update

2024-12-27 11:05:07 +03:00 · 2024-12-27 11:05:07 +03:00 · dde17611c6
commit dde17611c6
parent b69f361cf5
1 changed files with 297 additions and 10 deletions
--- a/module3.ipynb
+++ b/module3.ipynb
@ -76,7 +76,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
@ -95,24 +95,311 @@
    }
   ],
   "source": [
-    "import re\n",
+    "import re \n",
+    "\n",
+    "# This function checks if a given text contains any sequence that has 'a' followed by anything, then 'e' followed by anything, and ends with 'i'. Returns True or False accordingly.\n",
    "def check_aei (text):\n",
    "  result = re.search(r\"a.e.i\", text)\n",
    "  return result != None\n",
    "\n",
-    "print(check_aei(\"academia\")) # True\n",
-    "print(check_aei(\"aerial\")) # False\n",
-    "print(check_aei(\"paramedic\")) # True\n",
+    "print(check_aei(\"academia\")) # This should return: True\n",
+    "print(check_aei(\"aerial\")) # This should return: False\n",
+    "print(check_aei(\"paramedic\")) # This should return: True\n",
    "\n",
+    "# This function checks if a given text contains any punctuation marks (comma, period, colon, semicolon, question mark or exclamation point). Returns True or False accordingly.\n",
    "def check_punctuation (text):\n",
    "  result = re.search(r\"[,.:;?!]\", text)\n",
    "  return result != None\n",
    "\n",
-    "print(check_punctuation(\"This is a sentence that ends with a period.\")) # True\n",
-    "print(check_punctuation(\"This is a sentence fragment without a period\")) # False\n",
-    "print(check_punctuation(\"Aren't regular expressions awesome?\")) # True\n",
-    "print(check_punctuation(\"Wow! We're really picking up some steam now!\")) # True\n",
-    "print(check_punctuation(\"End of the line\")) # False"
+    "print(check_punctuation(\"This is a sentence that ends with a period.\")) # This should return: True\n",
+    "print(check_punctuation(\"This is a sentence fragment without a period\")) # This should return: False\n",
+    "print(check_punctuation(\"Aren't regular expressions awesome?\")) # This should return: True\n",
+    "print(check_punctuation(\"Wow! We're really picking up some steam now!\")) # This should return: True\n",
+    "print(check_punctuation(\"End of the line\")) # This should return: False\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<re.Match object; span=(0, 9), match='Pygmalion'>\n",
+      "<re.Match object; span=(0, 17), match='Python Programmin'>\n",
+      "<re.Match object; span=(0, 6), match='Python'>\n",
+      "<re.Match object; span=(0, 3), match='Pyn'>\n",
+      "<re.Match object; span=(1, 3), match='ol'>\n",
+      "<re.Match object; span=(1, 5), match='ooll'>\n",
+      "None\n",
+      "<re.Match object; span=(3, 7), match='each'>\n",
+      "<re.Match object; span=(7, 12), match='peach'>\n"
+     ]
+    }
+   ],
+   "source": [
+    "import re \n",
+    "print(re.search(r\"Py.*n\", \"Pygmalion\"))\n",
+    "# re.search() function returns a match object if it finds the pattern 'Py followed by any characters ending in n' within the\n",
+    "# string \"Pygmalion\". The '.' means any character (except newline), and '*' means zero or more repetitions of the preceding\n",
+    "# RE. Here, the RE is '.' which can mean anything. So this search looks for occurrences of the letter 'P' followed by one\n",
+    "# or more characters, then 'n'. In \"Pygmalion\", it finds and returns a match object for \"Pygmalion\". \n",
+    "print(re.search(r\"Py.*n\", \"Python Programming\"))\n",
+    "# Here we are searching for the pattern in the string Python Programming which also includes 'P' at start followed by any\n",
+    "# characters ending with 'n'.  So it returns a match object for \"Python\" from the word \"Python Programming\".\n",
+    "print(re.search(r\"Py[a-z]*n\", \"Python Programming\"))\n",
+    "# Here we are using a character set '[a-z]' which means any lowercase letter. So it allows 'P' followed by zero or more\n",
+    "# lowercase letters ending with 'n'. In this case, the match object is for \"Python\" from the word Python Programming as all\n",
+    "# characters in between P and n are lowercase.\n",
+    "print(re.search(r\"Py[a-z]*n\", \"Pyn\"))\n",
+    "# Here we search a pattern where 'P' followed by zero or more lowercase letters ending with 'n' in the string 'Pyn' itself. \n",
+    "# It returns match object for whole string Pyn as it satisfies our RE conditions.\n",
+    "print(re.search(r\"o+l+\", \"goldfish\"))\n",
+    "# Here we are looking for one or more 'o' followed by one or more 'l'. In the word goldfish, there is no 'o' followed by 'l'\n",
+    "# so it returns None.\n",
+    "print(re.search(r\"o+l+\", \"woolly\"))\n",
+    "# Here we are looking for one or more 'o' followed by one or more 'l'. In the word wooly there is 'oo' and both l's, so it\n",
+    "# returns match object for whole word wooly.\n",
+    "print(re.search(r\"o+l+\", \"boil\"))\n",
+    "# Here we are looking for one or more 'o' followed by one or more 'l'. In the word boil, there is only one o and two l’s.\n",
+    "# So it returns match object for whole word boil. \n",
+    "print(re.search(r\"p?each\", \"To each their own\"))\n",
+    "# Here we are looking for an optional 'p' followed by 'each' as 'p' can occur zero or one time. In the string \n",
+    "# \"To each their own\", it returns None because there is no 'p' before 'each'. \n",
+    "print(re.search(r\"p?each\", \"I like peaches\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The repeating_letter_a function checks if the text passed includes the letter \"a\" (lowercase or uppercase) at least twice.\n",
+    "# For example, repeating_letter_a(\"banana\") is True, while repeating_letter_a(\"pineapple\") is False.# Fill in the code to\n",
+    "# make this work. \n",
+    "import re\n",
+    "def repeating_letter_a(text):\n",
+    "  result = re.search(r\"[Aa].*[Aa]\", text)\n",
+    "  return result != None\n",
+    "\n",
+    "print(repeating_letter_a(\"banana\")) # True\n",
+    "print(repeating_letter_a(\"pineapple\")) # False\n",
+    "print(repeating_letter_a(\"Animal Kingdom\")) # True\n",
+    "print(repeating_letter_a(\"A is for apple\")) # True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "# This line of code uses regular expressions (regex) to search for any occurrence of '.com' in the word \"welcome\". \n",
+    "print(re.search(r\".com\", \"welcome\")) # <re.Match object; span=(2, 6), match='lcom'>\n",
+    "# This line of code uses regular expressions (regex) to search for any occurrence of '\\.com' in the word \"welcome\". \n",
+    "# The output will be None because there is no '.com' within the string 'welcome'. \n",
+    "# The backslash before '.' in the regex pattern escapes the period, making it match a literal period character rather than any character (as the period itself does in regex).\n",
+    "print(re.search(r\"\\.com\", \"welcome\")) # None\n",
+    "# This line of code uses regular expressions (regex) to search for any occurrence of '.com' in the word \"mydomain.com\". \n",
+    "print(re.search(r\"\\.com\", \"mydomain.com\")) # <re.Match object; span=(8, 12), match='.com'>\n",
+    "# In this \\w matches any alphanumeric character and '*' denotes zero or more repetitions in the word \"This is an example\". \n",
+    "print(re.search(r\"\\w*\", \"This is an example\")) # <re.Match object; span=(0, 4), match='This'>\n",
+    "# This line of code uses regular expressions (regex) to search for any occurrence of '\\w*' in the word \"And_this_is_another\". \n",
+    "print(re.search(r\"\\w*\", \"And_this_is_another\")) # <re.Match object; span=(0, 19), match='And_this_is_another'>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fill in the code to check if the text passed has at least 2 groups of alphanumeric characters\n",
+    "# (including letters, numbers, and underscores) separated by one or more whitespace characters.\n",
+    "import re\n",
+    "def check_character_groups(text):\n",
+    "  result = re.search(r\"\\w\\s\\w\", text)\n",
+    "  return result != None\n",
+    "\n",
+    "print(check_character_groups(\"One\")) # False\n",
+    "print(check_character_groups(\"123  Ready Set GO\")) # True\n",
+    "print(check_character_groups(\"username user_01\")) # True\n",
+    "print(check_character_groups(\"shopping_list: milk, bread, eggs.\")) # False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re  # import Python's regular expressions module\n",
+    "# This searches for a pattern in 'Argentina' that starts with an \"A\" and ends with an \"a\". The .* is a wildcard that can match any character (except newline) between A and a. It returns the matching object span.\n",
+    "print(re.search(r\"A.*a\", \"Argentina\"))  # <_sre.SRE_Match object; span=(0, 9), match='Argentina'>\n",
+    "# Same as previous search but in 'Azerbaijan'. It also returns the matching object span.\n",
+    "print(re.search(r\"A.*a\", \"Azerbaijan\"))  # <_sre.SRE_Match object; span=(0, 9), match='Azerbaija'>\n",
+    "# This checks if the entire string in 'Australia' starts with an \"A\" and ends with an \"a\". The ^ signifies start of a line and $ signifies end of a line. It returns None because Australia doesn't have a full stop at the end.\n",
+    "print(re.search(r\"^A.*a$\", \"Australia\"))  # <re.Match object; span=(0, 9), match='Australia'>\n",
+    "# This is a pattern that matches a string if it starts with alphanumeric character (including underscore) and then followed by any number of alphanumeric characters or underscores. \n",
+    "pattern = r\"^[a-zA-Z_][a-zA-Z0-9_]*$\"  # valid variable pattern in Python according to the standard conventions\n",
+    "# This searches if \"_this_is_a_valid_variable_name\" matches the pattern. It returns the matching object span because it does match the pattern.\n",
+    "print(re.search(pattern, \"_this_is_a_valid_variable_name\"))  # <_sre.SRE_Match object; span=(0, 28), match='_this_is_a_valid_variable_name'>\n",
+    "# This searches if \"this isn't a valid variable\" matches the pattern. It returns None because it contains space which is not allowed in Python variables according to standard conventions.\n",
+    "print(re.search(pattern, \"this isn't a valid variable\"))  # None\n",
+    "# This searches if \"my_variable1\" matches the pattern. It returns matching object span as it does match the pattern.\n",
+    "print(re.search(pattern, \"my_variable1\")) # <_sre.SRE_Match object; span=(0, 12), match='my_variable1'>\n",
+    "# This searches if \"2my_variable1\" matches the pattern. It returns None because it starts with a digit which is not allowed in Python variables according to standard conventions.\n",
+    "print(re.search(pattern, \"2my_variable1\")) # None\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Fill in the code to check if the text passed looks like a standard sentence, meaning that it starts with an\n",
+    "# uppercase letter, followed by at least some lowercase letters or a space, and ends with a period, question\n",
+    "# mark, or exclamation point. \n",
+    "import re\n",
+    "def check_sentence(text):\n",
+    "  result = re.search(r\"^[A-Z][a-z\\s].*[\\.?!]$\", text)\n",
+    "  return result != None\n",
+    "\n",
+    "print(check_sentence(\"Is this is a sentence?\")) # True\n",
+    "print(check_sentence(\"is this is a sentence?\")) # False\n",
+    "print(check_sentence(\"Hello\")) # False\n",
+    "print(check_sentence(\"1-2-3-GO!\")) # False\n",
+    "print(check_sentence(\"A star is born.\")) # True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "r”\\d{3}-\\d{3}-\\d{4}”  This line of code matches U.S. phone numbers in the format 111-222-3333.\n",
+    "r”^-?\\d*(\\.\\d+)?$”  This line of code matches any positive or negative number, with or without decimal places.\n",
+    "r”^(.+)\\/([^\\/]+)\\/” This line of code matches any path and filename."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#The check_web_address() function checks if the text passed qualifies as a top-level web address,\n",
+    "# meaning that it contains alphanumeric characters (which includes letters, numbers, and underscores),\n",
+    "# as well as periods, dashes, and a plus sign, followed by a period and a character-only top-level\n",
+    "# domain such as \".com\", \".info\", \".edu\", etc. Fill in the regular expression to do that, using escape\n",
+    "# characters, wildcards, repetition qualifiers, beginning and end-of-line characters, and character classes.\n",
+    "import re\n",
+    "def check_web_address(text):\n",
+    "  pattern = r\"^[A-Za-z0-9_.-]*\\.[A-Za-z]+$\"\n",
+    "  result = re.search(pattern, text)\n",
+    "  return result != None\n",
+    "\n",
+    "print(check_web_address(\"gmail.com\")) # True\n",
+    "print(check_web_address(\"www@google\")) # False\n",
+    "print(check_web_address(\"www.Coursera.org\")) # True\n",
+    "print(check_web_address(\"web-address.com/homepage\")) # False\n",
+    "print(check_web_address(\"My_Favorite-Blog.US\")) # True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "def check_time(text):\n",
+    "  pattern = r\"[0-9][:][0-5][0-9]\\s?(am|AM|pm|PM)\"\n",
+    "  result = re.search(pattern, text)\n",
+    "  return result != None\n",
+    "\n",
+    "print(check_time(\"12:45pm\")) # True\n",
+    "print(check_time(\"9:59 AM\")) # True\n",
+    "print(check_time(\"6:60am\")) # False\n",
+    "print(check_time(\"five o'clock\")) # False\n",
+    "print(check_time(\"6:02 am\")) # True\n",
+    "print(check_time(\"6:02km\")) # False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "def contains_acronym(text):\n",
+    "  pattern = r\".*\\([A-Za-z0-9]+\\).*\"\n",
+    "  result = re.search(pattern, text)\n",
+    "  return result != None\n",
+    "\n",
+    "print(contains_acronym(\"Instant messaging (IM) is a set of communication technologies used for text-based communication\")) # True\n",
+    "print(contains_acronym(\"American Standard Code for Information Interchange (ASCII) is a character encoding standard for electronic communication\")) # True\n",
+    "print(contains_acronym(\"Please do NOT enter without permission!\")) # False\n",
+    "print(contains_acronym(\"PostScript is a fourth-generation programming language (4GL)\")) # True\n",
+    "print(contains_acronym(\"Have fun using a self-contained underwater breathing apparatus (Scuba)!\")) # True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "False\n",
+      "True\n",
+      "False\n"
+     ]
+    }
+   ],
+   "source": [
+    "import re\n",
+    "def check_zip_code (text):\n",
+    " result = re.search(r\"^.*\\s(\\d{5})(-\\d{4})?.*$\", text)\n",
+    " return result != None\n",
+    "\n",
+    "print(check_zip_code(\"The zip codes for New York are 10001 thru 11104.\")) # True\n",
+    "print(check_zip_code(\"90210 is a TV show\")) # False\n",
+    "print(check_zip_code(\"Their address is: 123 Main Street, Anytown, AZ 85258-0001.\")) # True\n",
+    "print(check_zip_code(\"The Parliament of Canada is at 111 Wellington St, Ottawa, ON K1A0A9.\")) # False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Advanced Matching"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "# The regex is searching for a string that starts with a word (group 1) followed by a comma and space (group 2)\n",
+    "# then ends with another word\n",
+    "result = re.search(r\"^(\\w*), (\\w*)$\", \"Lovelace, Ada\")\n",
+    "print(result)\n",
+    "print(result.groups())\n",
+    "print(result[0])\n",
+    "print(result[1])\n",
+    "print(result[2])\n",
+    "\"{} {}\".format(result[2], result[1])"
   ]
  },
  {