使用Python将Apache日志转换为CSV

安装 apache_log_parser

pip install apache_log_parser

创建以下代码

几乎如下所示,部分修复了bug。
https://github.com/isonet/apache-log-to-csv

#!/usr/bin/env python
# coding: utf-8

import csv
import apache_log_parser
import argparse

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'


def main(**kwargs):

    print('Converting, please wait...')

    line_parser = apache_log_parser.make_parser(kwargs['format'])
    header = True

    with open(kwargs['input'], 'rb') as inFile, open(kwargs['output'], 'w') as outFile:

        lines = inFile.readlines()
        writer = csv.writer(outFile, delimiter=',')

        for line in lines:
            try:
                log_line_data = line_parser(line)
            except apache_log_parser.LineDoesntMatchException as ex:
                print(bcolors.FAIL + 'The format specified does not match the log file. Aborting...' + bcolors.ENDC)
                print('Line: ' + ex.log_line + 'RegEx: ' + ex.regex)
                exit()

            if header:
                writer.writerow(list(log_line_data.keys()))
                header = False
            else:
                writer.writerow(list(log_line_data.values()))

    print(bcolors.OKGREEN + 'Conversion finished.' + bcolors.ENDC)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Convert Apache logs to csv', version='%(prog)s 1.0')
    parser.add_argument('format', type=str, help='Apache log format (see http://httpd.apache.org/docs/2.2/logs.html)')
    parser.add_argument('input', type=str, help='Input log file ex. /var/log/apache/access.log')
    parser.add_argument('output', type=str, help='Output csv file ex. ~/accesslog.csv')
    args = parser.parse_args()
    main(**vars(args))

执行

Note: The word “実行する” can be translated into Chinese as “执行”, which means “to execute” or “to carry out”.

将alog2csv.py按照格式从源文件输出到目标文件。

python alog2csv.py "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" access_log_20200430 output.csv
广告
将在 10 秒后关闭
bannerAds