# Reading data interactively

In [None]:
cat hello.py
#!/usr/bin/env python3

name = input("Please enter your name: ")
print("Hello, " + name)

# ./hello.py 
# Please enter your name: Roger
# #Output will be Hello, Roger

In [None]:
# defining a function called 'to_seconds' which takes hours, minutes and seconds as
# inputs and returns their total equivalent in seconds by multiplying each with its
# corresponding factor (3600 for hours, 60 for minutes) and then summing them up.
def to_seconds(hours, minutes, seconds):
    return hours*3600+minutes*60+seconds

# Print a welcome message to the user
print("Welcome to this time converter")

# set 'cont' variable to "y" initially for entering into while loop.
cont = "y"

# while condition is true (user wants to continue), keep asking the
# number of hours, minutes and seconds from the user and print their total equivalent
# in seconds by calling 'to_seconds' function. Ask if they want to do another conversion.
# If not, break the loop.
while(cont.lower() == "y"):
    hours = int(input("Enter the number of hours: "))
    minutes = int(input("Enter the number of minutes: "))
    seconds = int(input("Enter the number of seconds: "))

    print("That's {} seconds".format(to_seconds(hours, minutes, seconds)))
    print()
    cont = input("Do you want to do another conversion? [y to continue] ")
    
# print a goodbye message when user decides to stop the program.
print("Goodbye!")


# Standard streams
I/O Streams

In [None]:
cat streams.py
#!/usr/bin/env python3

data = input("This will come from STDIN: ")
print("Now we write it to STDOUT: " + data)
print("Now we generate an error to STDERR: " + data + 1)

./streams.py 
This will come from STDIN: Python Rocks!
Now we write it to STDOUT: Python Rocks!

cat greeting.txt 
Well hello there, STDOUT

cat greeting.txt 
Well hello there, STDOUT

ls -z

## Environment variables

In [3]:
# echo $PATH
# /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# cat variables.py
#!/usr/bin/env python3
import os
print("HOME: " + os.environ.get("HOME", ""))
print("SHELL: " + os.environ.get("SHELL", ""))
print("FRUIT: " + os.environ.get("FRUIT", ""))
# ./variables.py
# export FRUIT=Pineapple
# ./variables.py

HOME: /config
SHELL: 
FRUIT: 


## Command-Line Arguments and Exit Status

In [None]:
cat parameters.py 
#!/usr/bin/env python3
import sys
print(sys.argv)

./parameters.py
['./parameters.py'] 

./parameters.py one two three
['./parameters.py', 'one', 'two', 'three']


wc variables.py
7 19 174 variables.py 	
echo $?
0

wc notpresent.sh
wc: notpresent.sh: No such file or directory
echo $?
1

#!/usr/bin/env python3

import os
import sys

filename = sys.argv[1]

if not os.path.exists(filename):
    with open(filename, "w") as f:
        f.write("New file created\n")
else:
    print("Error, the file {} already exists!".format(filename))
    sys.exit(1)

./create_file.py example
echo $?
0

cat example 
New file created
./create_file.py example
Error, the file example already exists!
echo $?
1

Python 2 and Python 3 handle input and raw_input differently.

In Python 2

- input(x) is roughly the same as eval(raw_input(x))
- raw_input() is preferred, unless the author wants to support evaluating string expressions.
- eval() is used to evaluate string expressions.

In [None]:
>>> my_number = input('Please Enter a Number: \n')
Please Enter a Number: 
123 + 1
>>> print(my_number)
123 + 1
>>> type(my_number)
<class 'str'>


>>> my_number = input('Please Enter a Number: \n')
Please Enter a Number: 
123 + 1
>>> print(my_number)
123 + 1
>>> eval(my_number)
124

# Subprocesses

## Running system commands in Python

In [None]:
import subprocess
subprocess.run(["date"])
subprocess.run(["sleep", "2"])
result = subprocess.run(["ls", "this_file_does_not_exist"])
print(result.returncode)

## Obtaining the output of a system command

In [None]:
result = subprocess.run(["host", "8.8.8.8"], capture_output=True)

result = subprocess.run(["host", "8.8.8.8"], capture_output=True)
print(result.returncode)

result = subprocess.run(["host", "8.8.8.8"], capture_output=True)
print(result.stdout)

result = subprocess.run(["host", "8.8.8.8"], capture_output=True)
print(result.stdout.decode().split())

In [None]:
import subprocess
result = subprocess.run(["rm", "does_not_exist"], capture_output=True)

import subprocess
result = subprocess.run(["rm", "does_not_exist"], capture_output=True)
print(result.returncode)

import subprocess
result = subprocess.run(["rm", "does_not_exist"], capture_output=True)
print(result.returncode)
print(result.stdout)
print(result.stderr)

## Advanced subprocess management

In [None]:
import os
import subprocess

my_env = os.environ.copy()
my_env["PATH"] = os.pathsep.join(["/opt/myapp/", my_env["PATH"]])

result = subprocess.run(["myapp"], env=my_env)

**Python subprocesses**

In Python, there are usually a lot of different ways to accomplish the same task. Some are easier to write, some are better suited to a given task, and some have a lower overhead in terms of the amount of computing power used. Subprocesses are a way to call and run other applications from within Python, including other Python scripts. In Python, the subprocess module can run new codes and applications by launching the new processes from the Python program. Because subprocess allows you to spawn new processes, it is a very useful way to run multiple processes in parallel instead of sequentially.

Python subprocess can launch processes to: 
	Open multiple data files in a folder simultaneously. 
	Run external programs. 
	Connect to input, output, and error pipes and get return codes.

Comparing subprocess to OS and Pathlib
Again, Python has multiple ways to achieve most tasks; subprocess is extremely powerful, as it allows you to do anything you would from Python in the shell and get information back into Python. But just because you can use subprocess doesn’t always mean you'll want to. 

Let’s compare subprocess to two of its alternatives: OS, which has been covered in other readings, and Pathlib. For tasks like getting the current working directory or creating a directory, OS and Pathlib are more direct (or “Pythonic,” meaning it uses the language as it was intended). Using subprocess for tasks like these is like using a crowbar to open a nut. It's more heavy-duty and can be overkill for simple operations. 

As a comparison example, the following commands accomplish the exact same tasks of getting the current working directory. 

Subprocess: 

cwd_subprocess = subprocess.check_output(['pwd'], text=True).strip()

OS: 

cwd_os = os.getcwd()

Pathlib: 

cwd_pathlib = Path.cwd()

And these following commands accomplish the exact same tasks of creating a directory. 

Subprocess: 

subprocess.run(['mkdir', 'test_dir_subprocess2'])

OS: 

os.mkdir('test_dir_os2')

Pathlib: 

test_dir_pathlib2 = Path('test_dir_pathlib2')

test_dir_pathlib2.mkdir(exist_ok=True) #Ensures the directory is created only if it doesn't already exist

**When to use subprocess**
Subprocess is best used when you need to interface with external processes, run complex shell commands, or need precise control over input and output. Subprocess also spawns fewer processes per task than OS, so subprocess can use less compute power. 

**Other advantages include:**
	Subprocess can run any shell command, providing greater flexibility.
	Subprocess can capture stdout and stderr easily.

On the other hand, OS is useful for basic file and directory operations, environment variable management, and when you don't need the object-oriented approach provided by Pathlib. 

**Other advantages include:**
	OS provides a simple way to interface with the operating system for basic operations.
	OS is part of the standard library, so it's widely available.

Finally, Pathlib is most helpful for working extensively with file paths, when you want an object-oriented and intuitive way to handle file system tasks, or when you're working on code where readability and maintainability are crucial. 

**Other advantages include:**
	Pathlib provides an object-oriented approach to handle file system paths.
	Compared to OS, Pathlib is more intuitive for file and directory operations. 
	Pathlib is more readable for path manipulations.

**Where subprocess shines**
The basic ways of using subprocess are the .run() and .Popen() methods. There are additional methods, .call(), .check_output(), and .check_call(). Usually, you will just want to use .run() or one of the two check methods when appropriate. However, when spawning parallel processes or communicating between subprocesses, .Popen() has a lot more power!

You can think of .run() as the simplest way to run a command—it’s all right there in the name—and .Popen() as the most fully featured way to call external commands. 
All of the methods, .run(), .call(),  .check_output(), and .check_call() are wrappers around the .Popen() class. 

Run
The .run() command is the recommended approach to invoking subprocesses. It runs the command, waits for it to complete, then returns a CompletedProcess instance that contains information about the process.

Using .run() to execute the echo command:

result_run = subprocess.run(['echo', 'Hello, World!'], capture_output=True, text=True)

result_run.stdout.strip()  # Extracting the stdout and stripping any extra whitespace

output:

'Hello, World!'

Call 
The call() command runs a command, waits for it to complete, then returns the return code. Call is older and .run() should be used now, but it’s good to see how it works.

Using call() to execute the echo command: 

return_code_call = subprocess.call(['echo', 'Hello from call!'])

return_code_call

output:

0

The returned value 0 indicates that the command was executed successfully.

Check_call and check_output
Use check_call() to receive just the status of a command. Use check_output() to also obtain output. These are good for situations such as file IO, where a file might not exis, or the operation may otherwise fail. 

The command check_call()is similar to call() but raises a CalledProcessError exception if the command returns a non-zero exit code.

Using check_call() to execute the echo command:

return_code_check_call = subprocess.check_call(['echo', 'Hello from check_call!'])

return_code_check_call

output:

0

The returned value 0 indicates that the command was executed successfully.

Using check_output() to execute the echo command:

output_check_output = subprocess.check_output(['echo', 'Hello from check_output!'], text=True)

output_check_output.strip()  # Extracting the stdout and stripping any extra whitespace

output:

'Hello from check_output!'

Note: Check_output raises a CalledProcessError if the command returns a non-zero exit code. For more on CalledProcessError, see 
Exceptions
.

**Popen**
Popen() offers more advanced features compared to the previously mentioned functions. It allows you to spawn a new process, connect to its input/output/error pipes, and obtain its return code.

Using Popen to execute the echo command:

process_popen = subprocess.Popen(['echo', 'Hello from popen!'], stdout=subprocess.PIPE, text=True)

output_popen, _ = process_popen.communicate()

output_popen.strip()  # Extracting the stdout and stripping any extra whitespace

output:

'Hello from popen!'

**Pro tip**
The Popen command is very useful when you need asynchronous behavior and the ability to pipe information between a subprocess and the Python program that ran that subprocess. Imagine you want to start a long-running command in the background and then continue with other tasks in your script. Later on, you want to be able to check if the process has finished. Here’s how you would do that using Popen.

import subprocess

Using Popen for asynchronous behavior: 

process = subprocess.Popen(['sleep', '5'])

message_1 = "The process is running in the background..."

Give it a couple of seconds to demonstrate the asynchronous behavior

import time

time.sleep(2)

Check if the process has finished

if process.poll() is None:

	message_2 = "The process is still running."

else:

	message_2 = "The process has finished."

print(message_1, message_2)

output:

('The process is running in the background...',

 'The process is still running.')

The process runs in the background as the script continues with other tasks (in this case, simply waiting for a couple of seconds). Then the script checks if the process is still running. In this case, the check was after 2 seconds' sleep, but Popen called sleep on 5 seconds. So the program confirms that the subprocess has not finished running. 

# Processing Log Files

## Filtering log files with regular expressions

In [None]:
#!/bin/env/python3
import sys
logfile = sys.argv[1]
with open(logfile) as f:
  for line in f:
    print(line.strip())

#!/bin/env/python3
import sys
logfile = sys.argv[1]
with open(logfile) as f:
  for line in f:
    if "CRON" not in line:
      continue
    print(line.strip())

import re
pattern = r"USER \((\w+)\)$"
line = "Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)"
result = re.search(pattern, line)
print(result[1])

#!/bin/env/python3
import re
import sys

logfile = sys.argv[1]
with open(logfile) as f:
  for line in f:
    if "CRON" not in line:
      continue
    pattern = r"USER \((.+)\)$"
    result = re.search(pattern, line)
    print(result[1])

chmod +x check_cron.py 
./check_cron.py syslog 

In [None]:
# We're using the same syslog, and we want to display the date, time, and process id that's inside the square brackets. 
# We can read each line of the syslog and pass the contents to the show_time_of_pid function. Fill in the gaps to extract the date, 
# time, and process id from the passed line, and return this format: Jul 6 14:01:23 pid:29440.
import re
def show_time_of_pid(line):
  pattern = r"(\w+ \d+ \d+:\d+:\d+) .*?\[(\d+)\]"
  result = re.search(pattern, line)
  return result[1] + " pid:" + result[2]

print(show_time_of_pid("Jul 6 14:01:23 computer.name CRON[29440]: USER (good_user)")) # Jul 6 14:01:23 pid:29440
print(show_time_of_pid("Jul 6 14:02:08 computer.name jam_tag=psim[29187]: (UUID:006)")) # Jul 6 14:02:08 pid:29187
print(show_time_of_pid("Jul 6 14:02:09 computer.name jam_tag=psim[29187]: (UUID:007)")) # Jul 6 14:02:09 pid:29187
print(show_time_of_pid("Jul 6 14:03:01 computer.name CRON[29440]: USER (naughty_user)")) # Jul 6 14:03:01 pid:29440
print(show_time_of_pid("Jul 6 14:03:40 computer.name cacheclient[29807]: start syncing from \"0xDEADBEEF\"")) # Jul 6 14:03:40 pid:29807
print(show_time_of_pid("Jul 6 14:04:01 computer.name CRON[29440]: USER (naughty_user)")) # Jul 6 14:04:01 pid:29440
print(show_time_of_pid("Jul 6 14:05:01 computer.name CRON[29440]: USER (naughty_user)")) # Jul 6 14:05:01 pid:29440

## Making sense out of the data

In [None]:
usernames = {}
name = "good_user"
usernames[name] = usernames.get(name, 0) + 1
print(usernames)
usernames[name] = usernames.get(name, 0) + 1
print(usernames)

In [None]:
#!/bin/env/python3
import re
import sys

logfile = sys.argv[1]
usernames = {}
with open(logfile) as f:
  for line in f:
    if "CRON" not in line:
      continue
    pattern = r"USER \((\w+)\)$"
    result = re.search(pattern, line)

    if result is None:
      continue
    name = result[1]
    usernames[name] = usernames.get(name, 0) + 1

print(usernames)

Terms and definitions from course 2, module 4

**Bash:** The most commonly used shell on Linux

**Command line arguments:** Inputs provided to a program when running it from the command line

**Environment variables:** Settings and data stored outside a program that can be accessed by it to alter how the program behaves in a particular environment

**Input / Output (I/O):** These streams are the basic mechanism for performing input and output operations in your programs

**Log files:** Log files are records or text files that store a history of events, actions, or errors generated by a computer system, software, or application for diagnostic, troubleshooting, or auditing purposes

**Standard input stream commonly (STDIN):** A channel between a program and a source of input

**Standard output stream (STDOUT):** A pathway between a program and a target of output, like a display

**Standard error (STDERR):** This displays output like standard out, but is used specifically as a channel to show error messages and diagnostics from the program

**Shell:** The application that reads and executes all commands 

**Subprocesses:** A process to call and run other applications from within Python, including other Python scripts

## Exemplar: Work with log files

In [None]:
# View log file
Month Day hour:minute:second mycomputername "process_name"["random 5 digit number"] "ERROR/INFO/WARN" "Error description"
cat ~/data/fishy.log

July 31 00:06:21 mycomputername kernel[96041]: WARN Failed to start network connection
July 31 00:09:53 mycomputername updater[46711]: WARN Computer needs to be turned off and on again
July 31 00:12:36 mycomputername kernel[48462]: INFO Successfully connected
July 31 00:13:52 mycomputername updater[43530]: ERROR Error running Python2.exe: Segmentation Fault (core dumped)
July 31 00:16:13 mycomputername NetworkManager[63902]: WARN Failed to start application install
July 31 00:26:45 mycomputername CRON[83063]: INFO I'm sorry Dave. I'm afraid I can't do that
July 31 00:27:56 mycomputername cacheclient[75746]: WARN PC Load Letter
July 31 00:33:31 mycomputername system[25588]: ERROR Out of yellow ink, specifically, even though you want grayscale
July 31 00:36:55 mycomputername updater[73786]: WARN Packet loss
July 31 00:37:38 mycomputername dhcpclient[87602]: INFO Googling the answer
July 31 00:37:48 mycomputername utility[21449]: ERROR The cake is a lie!
July 31 00:44:50 mycomputername kernel[63793]: ERROR Failed process [13966]

# Find an error
cd ~/scripts
nano find_error.py

In [None]:
#!/usr/bin/env python3
import sys
import os
import re


def error_search(log_file):
    error = input("What is the error?")
    returned_errors = []
    with open(log_file, mode='r',encoding='UTF-8') as file:
        for log in file.readlines():
            error_patterns = ["error"]
        for i in range(len(error.split(' '))):
            client_loop: send disconnect: I/O errorappend(r"{}".format(error.split(' ')[i].lower()))
        if all(re.search(error_pattern, log.lower()) for error_pattern in error_patterns$
            returned_errors.append(log)
    file.close()
    return returned_errors


def file_output(returned_errors):
    with open(os.path.expanduser('~') + '/data/errors_found.log', 'w') as file:
        for error in returned_errors:
            file.write(error)
        file.close()
if __name__ == "__main__":
    log_file = sys.argv[1]
    returned_errors = error_search(log_file)
    file_output(returned_errors)
    sys.exit(0)
