Using_Python_to_Interact_wi.../module4.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Reading data interactively"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat hello.py\n",
    "#!/usr/bin/env python3\n",
    "\n",
    "name = input(\"Please enter your name: \")\n",
    "print(\"Hello, \" + name)\n",
    "\n",
    "# ./hello.py \n",
    "# Please enter your name: Roger\n",
    "# #Output will be Hello, Roger"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# defining a function called 'to_seconds' which takes hours, minutes and seconds as\n",
    "# inputs and returns their total equivalent in seconds by multiplying each with its\n",
    "# corresponding factor (3600 for hours, 60 for minutes) and then summing them up.\n",
    "def to_seconds(hours, minutes, seconds):\n",
    "    return hours*3600+minutes*60+seconds\n",
    "\n",
    "# Print a welcome message to the user\n",
    "print(\"Welcome to this time converter\")\n",
    "\n",
    "# set 'cont' variable to \"y\" initially for entering into while loop.\n",
    "cont = \"y\"\n",
    "\n",
    "# while condition is true (user wants to continue), keep asking the\n",
    "# number of hours, minutes and seconds from the user and print their total equivalent\n",
    "# in seconds by calling 'to_seconds' function. Ask if they want to do another conversion.\n",
    "# If not, break the loop.\n",
    "while(cont.lower() == \"y\"):\n",
    "    hours = int(input(\"Enter the number of hours: \"))\n",
    "    minutes = int(input(\"Enter the number of minutes: \"))\n",
    "    seconds = int(input(\"Enter the number of seconds: \"))\n",
    "\n",
    "    print(\"That's {} seconds\".format(to_seconds(hours, minutes, seconds)))\n",
    "    print()\n",
    "    cont = input(\"Do you want to do another conversion? [y to continue] \")\n",
    "    \n",
    "# print a goodbye message when user decides to stop the program.\n",
    "print(\"Goodbye!\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Standard streams\n",
    "I/O Streams"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat streams.py\n",
    "#!/usr/bin/env python3\n",
    "\n",
    "data = input(\"This will come from STDIN: \")\n",
    "print(\"Now we write it to STDOUT: \" + data)\n",
    "print(\"Now we generate an error to STDERR: \" + data + 1)\n",
    "\n",
    "./streams.py \n",
    "This will come from STDIN: Python Rocks!\n",
    "Now we write it to STDOUT: Python Rocks!\n",
    "\n",
    "cat greeting.txt \n",
    "Well hello there, STDOUT\n",
    "\n",
    "cat greeting.txt \n",
    "Well hello there, STDOUT\n",
    "\n",
    "ls -z"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Environment variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "HOME: /config\n",
      "SHELL: \n",
      "FRUIT: \n"
     ]
    }
   ],
   "source": [
    "# echo $PATH\n",
    "# /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\n",
    "# cat variables.py\n",
    "#!/usr/bin/env python3\n",
    "import os\n",
    "print(\"HOME: \" + os.environ.get(\"HOME\", \"\"))\n",
    "print(\"SHELL: \" + os.environ.get(\"SHELL\", \"\"))\n",
    "print(\"FRUIT: \" + os.environ.get(\"FRUIT\", \"\"))\n",
    "# ./variables.py\n",
    "# export FRUIT=Pineapple\n",
    "# ./variables.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Command-Line Arguments and Exit Status"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat parameters.py \n",
    "#!/usr/bin/env python3\n",
    "import sys\n",
    "print(sys.argv)\n",
    "\n",
    "./parameters.py\n",
    "['./parameters.py'] \n",
    "\n",
    "./parameters.py one two three\n",
    "['./parameters.py', 'one', 'two', 'three']\n",
    "\n",
    "\n",
    "wc variables.py\n",
    "7 19 174 variables.py \t\n",
    "echo $?\n",
    "0\n",
    "\n",
    "wc notpresent.sh\n",
    "wc: notpresent.sh: No such file or directory\n",
    "echo $?\n",
    "1\n",
    "\n",
    "#!/usr/bin/env python3\n",
    "\n",
    "import os\n",
    "import sys\n",
    "\n",
    "filename = sys.argv[1]\n",
    "\n",
    "if not os.path.exists(filename):\n",
    "    with open(filename, \"w\") as f:\n",
    "        f.write(\"New file created\\n\")\n",
    "else:\n",
    "    print(\"Error, the file {} already exists!\".format(filename))\n",
    "    sys.exit(1)\n",
    "\n",
    "./create_file.py example\n",
    "echo $?\n",
    "0\n",
    "\n",
    "cat example \n",
    "New file created\n",
    "./create_file.py example\n",
    "Error, the file example already exists!\n",
    "echo $?\n",
    "1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Python 2 and Python 3 handle input and raw_input differently.\n",
    "\n",
    "In Python 2\n",
    "\n",
    "- input(x) is roughly the same as eval(raw_input(x))\n",
    "- raw_input() is preferred, unless the author wants to support evaluating string expressions.\n",
    "- eval() is used to evaluate string expressions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    ">>> my_number = input('Please Enter a Number: \\n')\n",
    "Please Enter a Number: \n",
    "123 + 1\n",
    ">>> print(my_number)\n",
    "123 + 1\n",
    ">>> type(my_number)\n",
    "<class 'str'>\n",
    "\n",
    "\n",
    ">>> my_number = input('Please Enter a Number: \\n')\n",
    "Please Enter a Number: \n",
    "123 + 1\n",
    ">>> print(my_number)\n",
    "123 + 1\n",
    ">>> eval(my_number)\n",
    "124"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Subprocesses"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Running system commands in Python"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "subprocess.run([\"date\"])\n",
    "subprocess.run([\"sleep\", \"2\"])\n",
    "result = subprocess.run([\"ls\", \"this_file_does_not_exist\"])\n",
    "print(result.returncode)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Obtaining the output of a system command"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result = subprocess.run([\"host\", \"8.8.8.8\"], capture_output=True)\n",
    "\n",
    "result = subprocess.run([\"host\", \"8.8.8.8\"], capture_output=True)\n",
    "print(result.returncode)\n",
    "\n",
    "result = subprocess.run([\"host\", \"8.8.8.8\"], capture_output=True)\n",
    "print(result.stdout)\n",
    "\n",
    "result = subprocess.run([\"host\", \"8.8.8.8\"], capture_output=True)\n",
    "print(result.stdout.decode().split())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "result = subprocess.run([\"rm\", \"does_not_exist\"], capture_output=True)\n",
    "\n",
    "import subprocess\n",
    "result = subprocess.run([\"rm\", \"does_not_exist\"], capture_output=True)\n",
    "print(result.returncode)\n",
    "\n",
    "import subprocess\n",
    "result = subprocess.run([\"rm\", \"does_not_exist\"], capture_output=True)\n",
    "print(result.returncode)\n",
    "print(result.stdout)\n",
    "print(result.stderr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advanced subprocess management"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import subprocess\n",
    "\n",
    "my_env = os.environ.copy()\n",
    "my_env[\"PATH\"] = os.pathsep.join([\"/opt/myapp/\", my_env[\"PATH\"]])\n",
    "\n",
    "result = subprocess.run([\"myapp\"], env=my_env)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Python subprocesses**\n",
    "\n",
    "In Python, there are usually a lot of different ways to accomplish the same task. Some are easier to write, some are better suited to a given task, and some have a lower overhead in terms of the amount of computing power used. Subprocesses are a way to call and run other applications from within Python, including other Python scripts. In Python, the subprocess module can run new codes and applications by launching the new processes from the Python program. Because subprocess allows you to spawn new processes, it is a very useful way to run multiple processes in parallel instead of sequentially.\n",
    "\n",
    "Python subprocess can launch processes to: \n",
    "\tOpen multiple data files in a folder simultaneously. \n",
    "\tRun external programs. \n",
    "\tConnect to input, output, and error pipes and get return codes.\n",
    "\n",
    "Comparing subprocess to OS and Pathlib\n",
    "Again, Python has multiple ways to achieve most tasks; subprocess is extremely powerful, as it allows you to do anything you would from Python in the shell and get information back into Python. But just because you can use subprocess doesn’t always mean you'll want to. \n",
    "\n",
    "Let’s compare subprocess to two of its alternatives: OS, which has been covered in other readings, and Pathlib. For tasks like getting the current working directory or creating a directory, OS and Pathlib are more direct (or “Pythonic,” meaning it uses the language as it was intended). Using subprocess for tasks like these is like using a crowbar to open a nut. It's more heavy-duty and can be overkill for simple operations. \n",
    "\n",
    "As a comparison example, the following commands accomplish the exact same tasks of getting the current working directory. \n",
    "\n",
    "Subprocess: \n",
    "\n",
    "cwd_subprocess = subprocess.check_output(['pwd'], text=True).strip()\n",
    "\n",
    "OS: \n",
    "\n",
    "cwd_os = os.getcwd()\n",
    "\n",
    "Pathlib: \n",
    "\n",
    "cwd_pathlib = Path.cwd()\n",
    "\n",
    "And these following commands accomplish the exact same tasks of creating a directory. \n",
    "\n",
    "Subprocess: \n",
    "\n",
    "subprocess.run(['mkdir', 'test_dir_subprocess2'])\n",
    "\n",
    "OS: \n",
    "\n",
    "os.mkdir('test_dir_os2')\n",
    "\n",
    "Pathlib: \n",
    "\n",
    "test_dir_pathlib2 = Path('test_dir_pathlib2')\n",
    "\n",
    "test_dir_pathlib2.mkdir(exist_ok=True) #Ensures the directory is created only if it doesn't already exist\n",
    "\n",
    "**When to use subprocess**\n",
    "Subprocess is best used when you need to interface with external processes, run complex shell commands, or need precise control over input and output. Subprocess also spawns fewer processes per task than OS, so subprocess can use less compute power. \n",
    "\n",
    "**Other advantages include:**\n",
    "\tSubprocess can run any shell command, providing greater flexibility.\n",
    "\tSubprocess can capture stdout and stderr easily.\n",
    "\n",
    "On the other hand, OS is useful for basic file and directory operations, environment variable management, and when you don't need the object-oriented approach provided by Pathlib. \n",
    "\n",
    "**Other advantages include:**\n",
    "\tOS provides a simple way to interface with the operating system for basic operations.\n",
    "\tOS is part of the standard library, so it's widely available.\n",
    "\n",
    "Finally, Pathlib is most helpful for working extensively with file paths, when you want an object-oriented and intuitive way to handle file system tasks, or when you're working on code where readability and maintainability are crucial. \n",
    "\n",
    "**Other advantages include:**\n",
    "\tPathlib provides an object-oriented approach to handle file system paths.\n",
    "\tCompared to OS, Pathlib is more intuitive for file and directory operations. \n",
    "\tPathlib is more readable for path manipulations.\n",
    "\n",
    "**Where subprocess shines**\n",
    "The basic ways of using subprocess are the .run() and .Popen() methods. There are additional methods, .call(), .check_output(), and .check_call(). Usually, you will just want to use .run() or one of the two check methods when appropriate. However, when spawning parallel processes or communicating between subprocesses, .Popen() has a lot more power!\n",
    "\n",
    "You can think of .run() as the simplest way to run a command—it’s all right there in the name—and .Popen() as the most fully featured way to call external commands. \n",
    "All of the methods, .run(), .call(),  .check_output(), and .check_call() are wrappers around the .Popen() class. \n",
    "\n",
    "Run\n",
    "The .run() command is the recommended approach to invoking subprocesses. It runs the command, waits for it to complete, then returns a CompletedProcess instance that contains information about the process.\n",
    "\n",
    "Using .run() to execute the echo command:\n",
    "\n",
    "result_run = subprocess.run(['echo', 'Hello, World!'], capture_output=True, text=True)\n",
    "\n",
    "result_run.stdout.strip()  # Extracting the stdout and stripping any extra whitespace\n",
    "\n",
    "output:\n",
    "\n",
    "'Hello, World!'\n",
    "\n",
    "Call \n",
    "The call() command runs a command, waits for it to complete, then returns the return code. Call is older and .run() should be used now, but it’s good to see how it works.\n",
    "\n",
    "Using call() to execute the echo command: \n",
    "\n",
    "return_code_call = subprocess.call(['echo', 'Hello from call!'])\n",
    "\n",
    "return_code_call\n",
    "\n",
    "output:\n",
    "\n",
    "0\n",
    "\n",
    "The returned value 0 indicates that the command was executed successfully.\n",
    "\n",
    "Check_call and check_output\n",
    "Use check_call() to receive just the status of a command. Use check_output() to also obtain output. These are good for situations such as file IO, where a file might not exis, or the operation may otherwise fail. \n",
    "\n",
    "The command check_call()is similar to call() but raises a CalledProcessError exception if the command returns a non-zero exit code.\n",
    "\n",
    "Using check_call() to execute the echo command:\n",
    "\n",
    "return_code_check_call = subprocess.check_call(['echo', 'Hello from check_call!'])\n",
    "\n",
    "return_code_check_call\n",
    "\n",
    "output:\n",
    "\n",
    "0\n",
    "\n",
    "The returned value 0 indicates that the command was executed successfully.\n",
    "\n",
    "Using check_output() to execute the echo command:\n",
    "\n",
    "output_check_output = subprocess.check_output(['echo', 'Hello from check_output!'], text=True)\n",
    "\n",
    "output_check_output.strip()  # Extracting the stdout and stripping any extra whitespace\n",
    "\n",
    "output:\n",
    "\n",
    "'Hello from check_output!'\n",
    "\n",
    "Note: Check_output raises a CalledProcessError if the command returns a non-zero exit code. For more on CalledProcessError, see \n",
    "Exceptions\n",
    ".\n",
    "\n",
    "**Popen**\n",
    "Popen() offers more advanced features compared to the previously mentioned functions. It allows you to spawn a new process, connect to its input/output/error pipes, and obtain its return code.\n",
    "\n",
    "Using Popen to execute the echo command:\n",
    "\n",
    "process_popen = subprocess.Popen(['echo', 'Hello from popen!'], stdout=subprocess.PIPE, text=True)\n",
    "\n",
    "output_popen, _ = process_popen.communicate()\n",
    "\n",
    "output_popen.strip()  # Extracting the stdout and stripping any extra whitespace\n",
    "\n",
    "output:\n",
    "\n",
    "'Hello from popen!'\n",
    "\n",
    "**Pro tip**\n",
    "The Popen command is very useful when you need asynchronous behavior and the ability to pipe information between a subprocess and the Python program that ran that subprocess. Imagine you want to start a long-running command in the background and then continue with other tasks in your script. Later on, you want to be able to check if the process has finished. Here’s how you would do that using Popen.\n",
    "\n",
    "import subprocess\n",
    "\n",
    "Using Popen for asynchronous behavior: \n",
    "\n",
    "process = subprocess.Popen(['sleep', '5'])\n",
    "\n",
    "message_1 = \"The process is running in the background...\"\n",
    "\n",
    "Give it a couple of seconds to demonstrate the asynchronous behavior\n",
    "\n",
    "import time\n",
    "\n",
    "time.sleep(2)\n",
    "\n",
    "Check if the process has finished\n",
    "\n",
    "if process.poll() is None:\n",
    "\n",
    "\tmessage_2 = \"The process is still running.\"\n",
    "\n",
    "else:\n",
    "\n",
    "\tmessage_2 = \"The process has finished.\"\n",
    "\n",
    "print(message_1, message_2)\n",
    "\n",
    "output:\n",
    "\n",
    "('The process is running in the background...',\n",
    "\n",
    " 'The process is still running.')\n",
    "\n",
    "The process runs in the background as the script continues with other tasks (in this case, simply waiting for a couple of seconds). Then the script checks if the process is still running. In this case, the check was after 2 seconds' sleep, but Popen called sleep on 5 seconds. So the program confirms that the subprocess has not finished running. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Processing Log Files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Filtering log files with regular expressions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!/bin/env/python3\n",
    "import sys\n",
    "logfile = sys.argv[1]\n",
    "with open(logfile) as f:\n",
    "  for line in f:\n",
    "    print(line.strip())\n",
    "\n",
    "#!/bin/env/python3\n",
    "import sys\n",
    "logfile = sys.argv[1]\n",
    "with open(logfile) as f:\n",
    "  for line in f:\n",
    "    if \"CRON\" not in line:\n",
    "      continue\n",
    "    print(line.strip())\n",
    "\n",
    "import re\n",
    "pattern = r\"USER \\((\\w+)\\)$\"\n",
    "line = \"Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)\"\n",
    "result = re.search(pattern, line)\n",
    "print(result[1])\n",
    "\n",
    "#!/bin/env/python3\n",
    "import re\n",
    "import sys\n",
    "\n",
    "logfile = sys.argv[1]\n",
    "with open(logfile) as f:\n",
    "  for line in f:\n",
    "    if \"CRON\" not in line:\n",
    "      continue\n",
    "    pattern = r\"USER \\((.+)\\)$\"\n",
    "    result = re.search(pattern, line)\n",
    "    print(result[1])\n",
    "\n",
    "chmod +x check_cron.py \n",
    "./check_cron.py syslog "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# We're using the same syslog, and we want to display the date, time, and process id that's inside the square brackets. \n",
    "# We can read each line of the syslog and pass the contents to the show_time_of_pid function. Fill in the gaps to extract the date, \n",
    "# time, and process id from the passed line, and return this format: Jul 6 14:01:23 pid:29440.\n",
    "import re\n",
    "def show_time_of_pid(line):\n",
    "  pattern = r\"(\\w+ \\d+ \\d+:\\d+:\\d+) .*?\\[(\\d+)\\]\"\n",
    "  result = re.search(pattern, line)\n",
    "  return result[1] + \" pid:\" + result[2]\n",
    "\n",
    "print(show_time_of_pid(\"Jul 6 14:01:23 computer.name CRON[29440]: USER (good_user)\")) # Jul 6 14:01:23 pid:29440\n",
    "print(show_time_of_pid(\"Jul 6 14:02:08 computer.name jam_tag=psim[29187]: (UUID:006)\")) # Jul 6 14:02:08 pid:29187\n",
    "print(show_time_of_pid(\"Jul 6 14:02:09 computer.name jam_tag=psim[29187]: (UUID:007)\")) # Jul 6 14:02:09 pid:29187\n",
    "print(show_time_of_pid(\"Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:03:01 pid:29440\n",
    "print(show_time_of_pid(\"Jul 6 14:03:40 computer.name cacheclient[29807]: start syncing from \\\"0xDEADBEEF\\\"\")) # Jul 6 14:03:40 pid:29807\n",
    "print(show_time_of_pid(\"Jul 6 14:04:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:04:01 pid:29440\n",
    "print(show_time_of_pid(\"Jul 6 14:05:01 computer.name CRON[29440]: USER (naughty_user)\")) # Jul 6 14:05:01 pid:29440"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Making sense out of the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "usernames = {}\n",
    "name = \"good_user\"\n",
    "usernames[name] = usernames.get(name, 0) + 1\n",
    "print(usernames)\n",
    "usernames[name] = usernames.get(name, 0) + 1\n",
    "print(usernames)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!/bin/env/python3\n",
    "import re\n",
    "import sys\n",
    "\n",
    "logfile = sys.argv[1]\n",
    "usernames = {}\n",
    "with open(logfile) as f:\n",
    "  for line in f:\n",
    "    if \"CRON\" not in line:\n",
    "      continue\n",
    "    pattern = r\"USER \\((\\w+)\\)$\"\n",
    "    result = re.search(pattern, line)\n",
    "\n",
    "    if result is None:\n",
    "      continue\n",
    "    name = result[1]\n",
    "    usernames[name] = usernames.get(name, 0) + 1\n",
    "\n",
    "print(usernames)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Terms and definitions from course 2, module 4\n",
    "\n",
    "**Bash:** The most commonly used shell on Linux\n",
    "\n",
    "**Command line arguments:** Inputs provided to a program when running it from the command line\n",
    "\n",
    "**Environment variables:** Settings and data stored outside a program that can be accessed by it to alter how the program behaves in a particular environment\n",
    "\n",
    "**Input / Output (I/O):** These streams are the basic mechanism for performing input and output operations in your programs\n",
    "\n",
    "**Log files:** Log files are records or text files that store a history of events, actions, or errors generated by a computer system, software, or application for diagnostic, troubleshooting, or auditing purposes\n",
    "\n",
    "**Standard input stream commonly (STDIN):** A channel between a program and a source of input\n",
    "\n",
    "**Standard output stream (STDOUT):** A pathway between a program and a target of output, like a display\n",
    "\n",
    "**Standard error (STDERR):** This displays output like standard out, but is used specifically as a channel to show error messages and diagnostics from the program\n",
    "\n",
    "**Shell:** The application that reads and executes all commands \n",
    "\n",
    "**Subprocesses:** A process to call and run other applications from within Python, including other Python scripts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exemplar: Work with log files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# View log file\n",
    "Month Day hour:minute:second mycomputername \"process_name\"[\"random 5 digit number\"] \"ERROR/INFO/WARN\" \"Error description\"\n",
    "cat ~/data/fishy.log\n",
    "\n",
    "July 31 00:06:21 mycomputername kernel[96041]: WARN Failed to start network connection\n",
    "July 31 00:09:53 mycomputername updater[46711]: WARN Computer needs to be turned off and on again\n",
    "July 31 00:12:36 mycomputername kernel[48462]: INFO Successfully connected\n",
    "July 31 00:13:52 mycomputername updater[43530]: ERROR Error running Python2.exe: Segmentation Fault (core dumped)\n",
    "July 31 00:16:13 mycomputername NetworkManager[63902]: WARN Failed to start application install\n",
    "July 31 00:26:45 mycomputername CRON[83063]: INFO I'm sorry Dave. I'm afraid I can't do that\n",
    "July 31 00:27:56 mycomputername cacheclient[75746]: WARN PC Load Letter\n",
    "July 31 00:33:31 mycomputername system[25588]: ERROR Out of yellow ink, specifically, even though you want grayscale\n",
    "July 31 00:36:55 mycomputername updater[73786]: WARN Packet loss\n",
    "July 31 00:37:38 mycomputername dhcpclient[87602]: INFO Googling the answer\n",
    "July 31 00:37:48 mycomputername utility[21449]: ERROR The cake is a lie!\n",
    "July 31 00:44:50 mycomputername kernel[63793]: ERROR Failed process [13966]\n",
    "\n",
    "# Find an error\n",
    "cd ~/scripts\n",
    "nano find_error.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!/usr/bin/env python3\n",
    "import sys\n",
    "import os\n",
    "import re\n",
    "\n",
    "\n",
    "def error_search(log_file):\n",
    "    error = input(\"What is the error?\")\n",
    "    returned_errors = []\n",
    "    with open(log_file, mode='r',encoding='UTF-8') as file:\n",
    "        for log in file.readlines():\n",
    "            error_patterns = [\"error\"]\n",
    "        for i in range(len(error.split(' '))):\n",
    "            client_loop: send disconnect: I/O errorappend(r\"{}\".format(error.split(' ')[i].lower()))\n",
    "        if all(re.search(error_pattern, log.lower()) for error_pattern in error_patterns$\n",
    "            returned_errors.append(log)\n",
    "    file.close()\n",
    "    return returned_errors\n",
    "\n",
    "\n",
    "def file_output(returned_errors):\n",
    "    with open(os.path.expanduser('~') + '/data/errors_found.log', 'w') as file:\n",
    "        for error in returned_errors:\n",
    "            file.write(error)\n",
    "        file.close()\n",
    "if __name__ == \"__main__\":\n",
    "    log_file = sys.argv[1]\n",
    "    returned_errors = error_search(log_file)\n",
    "    file_output(returned_errors)\n",
    "    sys.exit(0)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}