Source code for terminal_tracker.preprocess

import pandas as pd
import datetime
import pytz


[docs]class Preprocessing: """ This class helps in preprocessing the history files Attributes: file (str): path to the history file timeframe (bool): whether time values are present in the history file shell (str): "zsh" or "bash" df (pandas.DataFrame): Columns: Command(str), Main Command (str), Arguments (str), Tags (str) Optional Columns: Time (str), Pretty Time (datetime.datetime) """ def __init__(self, file, timeframe=False, shell="zsh"): self.file = file self.timeframe = timeframe self.shell = shell self.df = self._convert() def _convert(self): if self.timeframe: return self._convert_timeframe() else: return self._convert_no_timeframe() def _convert_no_timeframe(self): data = [] for command in open(self.file, "r"): command = command.replace('\n', '') command_start = command.split(" ")[0] command_rest = command[len(command_start) + 1 :] index = command_rest.find("#") if index == -1: command_options = command_rest.replace('\n', '') tags = "" else: command_options = command_rest[: (index - 1)] # Last line error tags = command_rest[index + 1 :].replace('\n', '') data.append([command, command_start, command_options, tags]) columns = ["Command", "Main Command", "Arguments", "Tags"] df = pd.DataFrame(data, columns=columns) return df def _convert_timeframe(self): if self.shell == "zsh": data = self._convert_timeframe_zsh() elif self.shell == "bash": data = self._convert_timeframe_bash() columns = ["Command", "Time", "Pretty Time", "Main Command", "Arguments", "Tags"] df = pd.DataFrame(data, columns=columns) return df def _convert_timeframe_zsh(self): data = [] for line in open(self.file, "r"): sep = line.split(";") if len(sep) == 2: # TODO: Currently assumes Unix timestamp time = sep[0][2:].split(":")[0] if ":" in time: # TODO: remove? print(line) pretty_time = datetime.datetime.fromtimestamp(int(time), tz=pytz.utc) command = sep[1][:].replace('\n', '') command_start = command.split(" ")[0] command_rest = command[len(command_start) + 1 :] index = command_rest.find("#") if index == -1: command_options = command_rest tags = "" else: command_options = command_rest[: (index - 1)] # Last line error tags = command_rest[index + 1 :] data.append([command, time, pretty_time, command_start, command_options, tags]) # Multiline not handeled correctly else: print("Ignoring:" + str(line)) return data def _convert_timeframe_bash(self): data = [] prev = False for line in open(self.file, "r"): if line[0] == "#": prev = True time = line[1:].replace('\n', '') else: if prev: # TODO: Currently assumes Unix timestamp prev = False pretty_time = datetime.datetime.fromtimestamp(int(time), tz=pytz.utc) else: time = "No" pretty_time = "No" command = line.replace('\n', '') command_start = command.split(" ")[0] command_rest = command[len(command_start) + 1 :] index = command_rest.find("#") if index == -1: command_options = command_rest tags = "" else: command_options = command_rest[: (index - 1)] # Last line error tags = command_rest[index + 1 :] data.append([command, time, pretty_time, command_start, command_options, tags]) return data