{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Final Project" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Log analysis using regular expressions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grep \"ERROR\" syslog.log\n", "grep \"ERROR Tried to add information to closed ticket\" syslog.log" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import re\n", "line = \"May 27 11:45:40 ubuntu.local ticky: INFO: Created ticket [#1234] (username)\"\n", "re.search(r\"ticky: INFO: ([\\w ]*) \", line)\n", "# \n", "line = \"May 27 11:45:40 ubuntu.local ticky: ERROR Error creating ticket [#1234] (username)\"\n", "re.search(r\"ticky: ERROR ([\\w ]*) \", line)\n", "# \n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fruit = {\"oranges\": 3, \"apples\": 5, \"bananas\": 7, \"pears\": 2}\n", "sorted(fruit.items())\n", "# [('apples', 5), ('bananas', 7), ('oranges', 3), ('pears', 2)]\n", "\n", "import operator\n", "sorted(fruit.items(), key=operator.itemgetter(0))\n", "# [('apples', 5), ('bananas', 7), ('oranges', 3), ('pears', 2)]\n", "\n", "sorted(fruit.items(), key=operator.itemgetter(1))\n", "# [('pears', 2), ('oranges', 3), ('apples', 5), ('bananas', 7)]\n", "\n", "sorted(fruit.items(), key = operator.itemgetter(1), reverse=True)\n", "# [('bananas', 7), ('apples', 5), ('oranges', 3), ('pears', 2)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#!/usr/bin/env python3\n", "import sys\n", "import re\n", "import operator\n", "import csv\n", "\n", "# Dict: Count number of entries for each user\n", "per_user = {} # Splitting between INFO and ERROR\n", "# Dict: Number of different error messages\n", "errors = {}\n", "\n", "# * Read file and create dictionaries\n", "with open('syslog.log') as file:\n", " # read each line\n", " for line in file.readlines():\n", " # regex search\n", " # * Sample Line of log file\n", " # \"May 27 11:45:40 ubuntu.local ticky: INFO: Created ticket [#1234] (username)\"\n", " match = re.search(\n", " r\"ticky: ([\\w+]*):? ([\\w' ]*)[\\[[#0-9]*\\]?]? ?\\((.*)\\)$\", line)\n", " code, error_msg, user = match.group(1), match.group(2), match.group(3)\n", "\n", " # Populates error dict with ERROR messages from log file\n", " if error_msg not in errors.keys():\n", " errors[error_msg] = 1\n", " else:\n", " errors[error_msg] += 1\n", " # Populates per_user dict with users and default values\n", " if user not in per_user.keys():\n", " per_user[user] = {}\n", " per_user[user]['INFO'] = 0\n", " per_user[user]['ERROR'] = 0\n", " # Populates per_user dict with users logs entry\n", " if code == 'INFO':\n", " if user not in per_user.keys():\n", " per_user[user] = {}\n", " per_user[user]['INFO'] = 0\n", " else:\n", " per_user[user][\"INFO\"] += 1\n", " elif code == 'ERROR':\n", " if user not in per_user.keys():\n", " per_user[user] = {}\n", " per_user[user]['INFO'] = 0\n", " else:\n", " per_user[user]['ERROR'] += 1\n", "\n", "# Sorted by VALUE (Most common to least common)\n", "errors_list = sorted(errors.items(), key=operator.itemgetter(1), reverse=True)\n", "# Sorted by USERNAME\n", "per_user_list = sorted(per_user.items(), key=operator.itemgetter(0))\n", "\n", "file.close()\n", "# Insert at the beginning of the list\n", "errors_list.insert(0, ('Error', 'Count'))\n", "# * Create CSV file user_statistics\n", "with open('user_statistics.csv', 'w', newline='') as user_csv:\n", " for key, value in per_user_list:\n", " user_csv.write(str(key) + ',' +\n", " str(value['INFO']) + ',' + str(value['ERROR'])+'\\n')\n", "# * Create CSV error_message\n", "with open('error_message.csv', 'w', newline='') as error_csv:\n", " for key, value in errors_list:\n", " error_csv.write(str(key) + ' ' + str(value))" ] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }