diff --git a/module4.ipynb b/module4.ipynb index 7e699ac..251fa8a 100644 --- a/module4.ipynb +++ b/module4.ipynb @@ -506,6 +506,237 @@ "\n", "The process runs in the background as the script continues with other tasks (in this case, simply waiting for a couple of seconds). Then the script checks if the process is still running. In this case, the check was after 2 seconds' sleep, but Popen called sleep on 5 seconds. So the program confirms that the subprocess has not finished running. " ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Processing Log Files" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filtering log files with regular expressions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!/bin/env/python3\n", + "import sys\n", + "logfile = sys.argv[1]\n", + "with open(logfile) as f:\n", + " for line in f:\n", + " print(line.strip())\n", + "\n", + "#!/bin/env/python3\n", + "import sys\n", + "logfile = sys.argv[1]\n", + "with open(logfile) as f:\n", + " for line in f:\n", + " if \"CRON\" not in line:\n", + " continue\n", + " print(line.strip())\n", + "\n", + "import re\n", + "pattern = r\"USER \\((\\w+)\\)$\"\n", + "line = \"Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)\"\n", + "result = re.search(pattern, line)\n", + "print(result[1])\n", + "\n", + "#!/bin/env/python3\n", + "import re\n", + "import sys\n", + "\n", + "logfile = sys.argv[1]\n", + "with open(logfile) as f:\n", + " for line in f:\n", + " if \"CRON\" not in line:\n", + " continue\n", + " pattern = r\"USER \\((.+)\\)$\"\n", + " result = re.search(pattern, line)\n", + " print(result[1])\n", + "\n", + "chmod +x check_cron.py \n", + "./check_cron.py syslog " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We're using the same syslog, and we want to display the date, time, and process id that's inside the square brackets. \n", + "# We can read each line of the syslog and pass the contents to the show_time_of_pid function. Fill in the gaps to extract the date, \n", + "# time, and process id from the passed line, and return this format: Jul 6 14:01:23 pid:29440.\n", + "import re\n", + "def show_time_of_pid(line):\n", + " pattern = r\"(\\w+ \\d+ \\d+:\\d+:\\d+) .*?\\[(\\d+)\\]\"\n", + " result = re.search(pattern, line)\n", + " return result[1] + \" pid:\" + result[2]\n", + "\n", + "print(show_time_of_pid(\"Jul 6 14:01:23 computer.name CRON[29440]: USER (good_user)\")) # Jul 6 14:01:23 pid:29440\n", + "print(show_time_of_pid(\"Jul 6 14:02:08 computer.name jam_tag=psim[29187]: (UUID:006)\")) # Jul 6 14:02:08 pid:29187\n", + "print(show_time_of_pid(\"Jul 6 14:02:09 computer.name jam_tag=psim[29187]: (UUID:007)\")) # Jul 6 14:02:09 pid:29187\n", + "print(show_time_of_pid(\"Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:03:01 pid:29440\n", + "print(show_time_of_pid(\"Jul 6 14:03:40 computer.name cacheclient[29807]: start syncing from \\\"0xDEADBEEF\\\"\")) # Jul 6 14:03:40 pid:29807\n", + "print(show_time_of_pid(\"Jul 6 14:04:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:04:01 pid:29440\n", + "print(show_time_of_pid(\"Jul 6 14:05:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:05:01 pid:29440" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Making sense out of the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "usernames = {}\n", + "name = \"good_user\"\n", + "usernames[name] = usernames.get(name, 0) + 1\n", + "print(usernames)\n", + "usernames[name] = usernames.get(name, 0) + 1\n", + "print(usernames)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!/bin/env/python3\n", + "import re\n", + "import sys\n", + "\n", + "logfile = sys.argv[1]\n", + "usernames = {}\n", + "with open(logfile) as f:\n", + " for line in f:\n", + " if \"CRON\" not in line:\n", + " continue\n", + " pattern = r\"USER \\((\\w+)\\)$\"\n", + " result = re.search(pattern, line)\n", + "\n", + " if result is None:\n", + " continue\n", + " name = result[1]\n", + " usernames[name] = usernames.get(name, 0) + 1\n", + "\n", + "print(usernames)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Terms and definitions from course 2, module 4\n", + "\n", + "**Bash:** The most commonly used shell on Linux\n", + "\n", + "**Command line arguments:** Inputs provided to a program when running it from the command line\n", + "\n", + "**Environment variables:** Settings and data stored outside a program that can be accessed by it to alter how the program behaves in a particular environment\n", + "\n", + "**Input / Output (I/O):** These streams are the basic mechanism for performing input and output operations in your programs\n", + "\n", + "**Log files:** Log files are records or text files that store a history of events, actions, or errors generated by a computer system, software, or application for diagnostic, troubleshooting, or auditing purposes\n", + "\n", + "**Standard input stream commonly (STDIN):** A channel between a program and a source of input\n", + "\n", + "**Standard output stream (STDOUT):** A pathway between a program and a target of output, like a display\n", + "\n", + "**Standard error (STDERR):** This displays output like standard out, but is used specifically as a channel to show error messages and diagnostics from the program\n", + "\n", + "**Shell:** The application that reads and executes all commands \n", + "\n", + "**Subprocesses:** A process to call and run other applications from within Python, including other Python scripts" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exemplar: Work with log files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# View log file\n", + "Month Day hour:minute:second mycomputername \"process_name\"[\"random 5 digit number\"] \"ERROR/INFO/WARN\" \"Error description\"\n", + "cat ~/data/fishy.log\n", + "\n", + "July 31 00:06:21 mycomputername kernel[96041]: WARN Failed to start network connection\n", + "July 31 00:09:53 mycomputername updater[46711]: WARN Computer needs to be turned off and on again\n", + "July 31 00:12:36 mycomputername kernel[48462]: INFO Successfully connected\n", + "July 31 00:13:52 mycomputername updater[43530]: ERROR Error running Python2.exe: Segmentation Fault (core dumped)\n", + "July 31 00:16:13 mycomputername NetworkManager[63902]: WARN Failed to start application install\n", + "July 31 00:26:45 mycomputername CRON[83063]: INFO I'm sorry Dave. I'm afraid I can't do that\n", + "July 31 00:27:56 mycomputername cacheclient[75746]: WARN PC Load Letter\n", + "July 31 00:33:31 mycomputername system[25588]: ERROR Out of yellow ink, specifically, even though you want grayscale\n", + "July 31 00:36:55 mycomputername updater[73786]: WARN Packet loss\n", + "July 31 00:37:38 mycomputername dhcpclient[87602]: INFO Googling the answer\n", + "July 31 00:37:48 mycomputername utility[21449]: ERROR The cake is a lie!\n", + "July 31 00:44:50 mycomputername kernel[63793]: ERROR Failed process [13966]\n", + "\n", + "# Find an error\n", + "cd ~/scripts\n", + "nano find_error.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#!/usr/bin/env python3\n", + "import sys\n", + "import os\n", + "import re\n", + "\n", + "\n", + "def error_search(log_file):\n", + " error = input(\"What is the error?\")\n", + " returned_errors = []\n", + " with open(log_file, mode='r',encoding='UTF-8') as file:\n", + " for log in file.readlines():\n", + " error_patterns = [\"error\"]\n", + " for i in range(len(error.split(' '))):\n", + " client_loop: send disconnect: I/O errorappend(r\"{}\".format(error.split(' ')[i].lower()))\n", + " if all(re.search(error_pattern, log.lower()) for error_pattern in error_patterns$\n", + " returned_errors.append(log)\n", + " file.close()\n", + " return returned_errors\n", + "\n", + "\n", + "def file_output(returned_errors):\n", + " with open(os.path.expanduser('~') + '/data/errors_found.log', 'w') as file:\n", + " for error in returned_errors:\n", + " file.write(error)\n", + " file.close()\n", + "if __name__ == \"__main__\":\n", + " log_file = sys.argv[1]\n", + " returned_errors = error_search(log_file)\n", + " file_output(returned_errors)\n", + " sys.exit(0)\n" + ] } ], "metadata": {