update
This commit is contained in:
parent
1434973248
commit
7f02975103
231
module4.ipynb
231
module4.ipynb
@ -506,6 +506,237 @@
|
||||
"\n",
|
||||
"The process runs in the background as the script continues with other tasks (in this case, simply waiting for a couple of seconds). Then the script checks if the process is still running. In this case, the check was after 2 seconds' sleep, but Popen called sleep on 5 seconds. So the program confirms that the subprocess has not finished running. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Processing Log Files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Filtering log files with regular expressions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!/bin/env/python3\n",
|
||||
"import sys\n",
|
||||
"logfile = sys.argv[1]\n",
|
||||
"with open(logfile) as f:\n",
|
||||
" for line in f:\n",
|
||||
" print(line.strip())\n",
|
||||
"\n",
|
||||
"#!/bin/env/python3\n",
|
||||
"import sys\n",
|
||||
"logfile = sys.argv[1]\n",
|
||||
"with open(logfile) as f:\n",
|
||||
" for line in f:\n",
|
||||
" if \"CRON\" not in line:\n",
|
||||
" continue\n",
|
||||
" print(line.strip())\n",
|
||||
"\n",
|
||||
"import re\n",
|
||||
"pattern = r\"USER \\((\\w+)\\)$\"\n",
|
||||
"line = \"Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)\"\n",
|
||||
"result = re.search(pattern, line)\n",
|
||||
"print(result[1])\n",
|
||||
"\n",
|
||||
"#!/bin/env/python3\n",
|
||||
"import re\n",
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"logfile = sys.argv[1]\n",
|
||||
"with open(logfile) as f:\n",
|
||||
" for line in f:\n",
|
||||
" if \"CRON\" not in line:\n",
|
||||
" continue\n",
|
||||
" pattern = r\"USER \\((.+)\\)$\"\n",
|
||||
" result = re.search(pattern, line)\n",
|
||||
" print(result[1])\n",
|
||||
"\n",
|
||||
"chmod +x check_cron.py \n",
|
||||
"./check_cron.py syslog "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# We're using the same syslog, and we want to display the date, time, and process id that's inside the square brackets. \n",
|
||||
"# We can read each line of the syslog and pass the contents to the show_time_of_pid function. Fill in the gaps to extract the date, \n",
|
||||
"# time, and process id from the passed line, and return this format: Jul 6 14:01:23 pid:29440.\n",
|
||||
"import re\n",
|
||||
"def show_time_of_pid(line):\n",
|
||||
" pattern = r\"(\\w+ \\d+ \\d+:\\d+:\\d+) .*?\\[(\\d+)\\]\"\n",
|
||||
" result = re.search(pattern, line)\n",
|
||||
" return result[1] + \" pid:\" + result[2]\n",
|
||||
"\n",
|
||||
"print(show_time_of_pid(\"Jul 6 14:01:23 computer.name CRON[29440]: USER (good_user)\")) # Jul 6 14:01:23 pid:29440\n",
|
||||
"print(show_time_of_pid(\"Jul 6 14:02:08 computer.name jam_tag=psim[29187]: (UUID:006)\")) # Jul 6 14:02:08 pid:29187\n",
|
||||
"print(show_time_of_pid(\"Jul 6 14:02:09 computer.name jam_tag=psim[29187]: (UUID:007)\")) # Jul 6 14:02:09 pid:29187\n",
|
||||
"print(show_time_of_pid(\"Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:03:01 pid:29440\n",
|
||||
"print(show_time_of_pid(\"Jul 6 14:03:40 computer.name cacheclient[29807]: start syncing from \\\"0xDEADBEEF\\\"\")) # Jul 6 14:03:40 pid:29807\n",
|
||||
"print(show_time_of_pid(\"Jul 6 14:04:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:04:01 pid:29440\n",
|
||||
"print(show_time_of_pid(\"Jul 6 14:05:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:05:01 pid:29440"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Making sense out of the data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"usernames = {}\n",
|
||||
"name = \"good_user\"\n",
|
||||
"usernames[name] = usernames.get(name, 0) + 1\n",
|
||||
"print(usernames)\n",
|
||||
"usernames[name] = usernames.get(name, 0) + 1\n",
|
||||
"print(usernames)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!/bin/env/python3\n",
|
||||
"import re\n",
|
||||
"import sys\n",
|
||||
"\n",
|
||||
"logfile = sys.argv[1]\n",
|
||||
"usernames = {}\n",
|
||||
"with open(logfile) as f:\n",
|
||||
" for line in f:\n",
|
||||
" if \"CRON\" not in line:\n",
|
||||
" continue\n",
|
||||
" pattern = r\"USER \\((\\w+)\\)$\"\n",
|
||||
" result = re.search(pattern, line)\n",
|
||||
"\n",
|
||||
" if result is None:\n",
|
||||
" continue\n",
|
||||
" name = result[1]\n",
|
||||
" usernames[name] = usernames.get(name, 0) + 1\n",
|
||||
"\n",
|
||||
"print(usernames)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Terms and definitions from course 2, module 4\n",
|
||||
"\n",
|
||||
"**Bash:** The most commonly used shell on Linux\n",
|
||||
"\n",
|
||||
"**Command line arguments:** Inputs provided to a program when running it from the command line\n",
|
||||
"\n",
|
||||
"**Environment variables:** Settings and data stored outside a program that can be accessed by it to alter how the program behaves in a particular environment\n",
|
||||
"\n",
|
||||
"**Input / Output (I/O):** These streams are the basic mechanism for performing input and output operations in your programs\n",
|
||||
"\n",
|
||||
"**Log files:** Log files are records or text files that store a history of events, actions, or errors generated by a computer system, software, or application for diagnostic, troubleshooting, or auditing purposes\n",
|
||||
"\n",
|
||||
"**Standard input stream commonly (STDIN):** A channel between a program and a source of input\n",
|
||||
"\n",
|
||||
"**Standard output stream (STDOUT):** A pathway between a program and a target of output, like a display\n",
|
||||
"\n",
|
||||
"**Standard error (STDERR):** This displays output like standard out, but is used specifically as a channel to show error messages and diagnostics from the program\n",
|
||||
"\n",
|
||||
"**Shell:** The application that reads and executes all commands \n",
|
||||
"\n",
|
||||
"**Subprocesses:** A process to call and run other applications from within Python, including other Python scripts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exemplar: Work with log files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# View log file\n",
|
||||
"Month Day hour:minute:second mycomputername \"process_name\"[\"random 5 digit number\"] \"ERROR/INFO/WARN\" \"Error description\"\n",
|
||||
"cat ~/data/fishy.log\n",
|
||||
"\n",
|
||||
"July 31 00:06:21 mycomputername kernel[96041]: WARN Failed to start network connection\n",
|
||||
"July 31 00:09:53 mycomputername updater[46711]: WARN Computer needs to be turned off and on again\n",
|
||||
"July 31 00:12:36 mycomputername kernel[48462]: INFO Successfully connected\n",
|
||||
"July 31 00:13:52 mycomputername updater[43530]: ERROR Error running Python2.exe: Segmentation Fault (core dumped)\n",
|
||||
"July 31 00:16:13 mycomputername NetworkManager[63902]: WARN Failed to start application install\n",
|
||||
"July 31 00:26:45 mycomputername CRON[83063]: INFO I'm sorry Dave. I'm afraid I can't do that\n",
|
||||
"July 31 00:27:56 mycomputername cacheclient[75746]: WARN PC Load Letter\n",
|
||||
"July 31 00:33:31 mycomputername system[25588]: ERROR Out of yellow ink, specifically, even though you want grayscale\n",
|
||||
"July 31 00:36:55 mycomputername updater[73786]: WARN Packet loss\n",
|
||||
"July 31 00:37:38 mycomputername dhcpclient[87602]: INFO Googling the answer\n",
|
||||
"July 31 00:37:48 mycomputername utility[21449]: ERROR The cake is a lie!\n",
|
||||
"July 31 00:44:50 mycomputername kernel[63793]: ERROR Failed process [13966]\n",
|
||||
"\n",
|
||||
"# Find an error\n",
|
||||
"cd ~/scripts\n",
|
||||
"nano find_error.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!/usr/bin/env python3\n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def error_search(log_file):\n",
|
||||
" error = input(\"What is the error?\")\n",
|
||||
" returned_errors = []\n",
|
||||
" with open(log_file, mode='r',encoding='UTF-8') as file:\n",
|
||||
" for log in file.readlines():\n",
|
||||
" error_patterns = [\"error\"]\n",
|
||||
" for i in range(len(error.split(' '))):\n",
|
||||
" client_loop: send disconnect: I/O errorappend(r\"{}\".format(error.split(' ')[i].lower()))\n",
|
||||
" if all(re.search(error_pattern, log.lower()) for error_pattern in error_patterns$\n",
|
||||
" returned_errors.append(log)\n",
|
||||
" file.close()\n",
|
||||
" return returned_errors\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def file_output(returned_errors):\n",
|
||||
" with open(os.path.expanduser('~') + '/data/errors_found.log', 'w') as file:\n",
|
||||
" for error in returned_errors:\n",
|
||||
" file.write(error)\n",
|
||||
" file.close()\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" log_file = sys.argv[1]\n",
|
||||
" returned_errors = error_search(log_file)\n",
|
||||
" file_output(returned_errors)\n",
|
||||
" sys.exit(0)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
Loading…
Reference in New Issue
Block a user