#!/usr/bin/python import re import sys emptyField = '-' # Ordinals in the array from each log's entry ipCol = 9 userAgentCol = 10 urlReqCol = 5 urlParamsCol = 6 dateCol = 0 timeCol = 1 class Parser: def __init__(self, fileName): self.fileName = fileName def parseLogFile(self, pattern): self.cleanStuff() lines = [] try: if pattern.length() == 0: #let's catch every line that does'nt start with # pattern = '^[^#]' input = open(self.fileName) lines = input.readlines() input.close() for line in lines: match = re.search(str(pattern), line, re.I) if match: tokens = line.split(' ') ip = tokens[ipCol] userAgent = tokens[userAgentCol] urlReq = tokens[urlReqCol] if tokens[urlParamsCol] != emptyField: urlReq+='?'+tokens[urlParamsCol] dateTime = tokens[dateCol] + ' ' + tokens[timeCol] logEntry = W3SVCLogEntry(ip, userAgent, urlReq, dateTime) if not self.parsedRecords.has_key(ip): self.parsedRecords[ip] = {} if not self.parsedRecords[ip].has_key(userAgent): self.parsedRecords[ip][userAgent] = [] self.parsedRecords[ip][userAgent].append(logEntry) except IOError, exc: self.lastError = 'IOError: ' + str(exc) except IndexError, exc: self.lastError = 'IndexError: ' + str(exc) except: if self.lastError == '': self.lastError = 'Unexpected error: ' + str(sys.exc_info()[1]) print "Unexpected error:", sys.exc_info()[0] pass def cleanStuff(self): self.parsedRecords = {} self.lastError = '' class W3SVCLogEntry: def __init__(self, ip, userAgent, urlReq, dateTime): self.ip = ip self.userAgent = userAgent self.urlReq = urlReq self.dateTime = dateTime
What we are doing here is to use a class (W3SVCLogEntry) to encapsulate the different chunks of data we are interested in: ip address, user agent, url requested and date time when the request was made.
The parser class holds a dictionary member data, which will have as many keys as different ip addresses were found.
In turn, each key point to a value that is another dictionary, whose key is the user agent.
Finally, each of these keys point to a list of objects W3SVCLogEntry, initializated from the target values of each record associated to the user agent/ip address in question.
@pyqtSignature("") def on_actionOpen_Log_File_triggered(self): """ Slot documentation goes here. """ fName = QFileDialog.getOpenFileName(None, self.trUtf8("Select a W3SVC log file to parse"), QString(), self.trUtf8("*.log"), None) self.lblPath.setText(fName)
@pyqtSignature("") def on_btnParse_clicked(self): """ Slot documentation goes here. """ self.tvResult.clear() fName = self.lblPath.text() pattern = self.txtReToMatch.displayText() parser = Parser.Parser(fName) parser.parseLogFile(pattern) if 0<len(parser.lastError): self.lblPath.setText(parser.lastError) else: for ip_k in parser.parsedRecords.keys(): item = QTreeWidgetItem(self.tvResult) item.setText(0, ip_k) self.tvResult.addTopLevelItem(item) for ua_k in parser.parsedRecords[ip_k].keys(): item2 = QTreeWidgetItem(item) item2.setText(0, ua_k) item.addChild(item2) for logEntry in parser.parsedRecords[ip_k][ua_k]: item3 = QTreeWidgetItem(item2) item3.setText(0, logEntry.urlReq) item3.setText(1, logEntry.dateTime) item2.addChild(item3)
As you can see, what this method does is to populate the tvResult widget from the results of the Parser.parseLogFile invocation. This method has been called passing a regexp (defaulted to something that matches every non-comment line if nothing is passed).
To do this population, 3 loops are needed, one to go through all the ip address dictionary's elements, the second for the user agent dictionary's elements, and the third one for the list in the second level dictionary.
In order to finish the user interface, we could write some code to show the mandatory About... dialog as follows:
@pyqtSignature("") def on_action_About_triggered(self): """ Slot documentation goes here. """ QMessageBox.information(self, "About", "IIS log files parser")
@pyqtSignature("") def on_action_Quit_triggered(self): """ Slot documentation goes here. """ self.close()
For an overall perspective of all of it, take a look on the whole MainWindow class implementation, included in the source code along with this tutorial.