とあるURLが外からアクセスされた数と、そのユーザーエージェントを知りたい
http://d.hatena.ne.jp/yfp/20040818#p2のやつ。HDDに埋もれると嫌だから、貼っとく。
#! /usr/bin/ruby require 'getopts' require 'time' require 'date' unless getopts('d', 'from:', 'to:') abort "usage: #$0 [-d] [--from=2xxx-xx-xx] [--to=2xxx-xx-xx] log_file1 log_file2..." end if $OPT_from != nil date_from = Time.parse($OPT_from) else date_from = Time.parse(Time.now.strftime("%Y/%m/1")) end if $OPT_to != nil date_to = Time.parse($OPT_to) else date_to = Time.parse(Date.today.to_s) end if $OPT_d date_from = Time.parse((Date.today - 1).to_s) date_to = Time.parse(Date.today.to_s) end if date_from >= date_to then p "date error!", date_from, date_to abort "usage: #$0 [-d] [--from=2xxx-xx-xx] [--to=2xxx-xx-xx] log_file1 log_file2..." end if ARGV.size == 0 then abort "usage: #$0 [-d] [--from=2xxx-xx-xx] [--to=2xxx-xx-xx] log_file1 log_file2..." end DEVICE = [["SA31", "W21SA"], ["SN31", "W21S"], ["KC32", "W21K"], ["HI32", "W21H"], ["KC31", "W11K"], ["HI31", "W11H"], ["ST22", "INFOBAR"], ["TS28", "A5506T"], ["SA27", "A5505SA"], ["TS27", "A5504T"], ["SA26", "A5503SA"], ["KC24", "A5502K"], ["KC25", "A5502K"], ["TS26", "A5501T"], ["CA26", "A5407CA"], ["CA25", "A5406CA"], ["ST23", "A5405SA"], ["SN25", "A5404S"], ["CA24", "A5403CA"], ["SN24", "A5402S"], ["CA23", "A5401CA II"], ["CA23", "A5401CA"], ["ST21", "A5306ST"], ["KC22", "A5305K"], ["TS24", "A5304T"], ["HI24", "A5303H II"], ["HI23", "A5303H"], ["CA22", "A5302CA"], ["TS23", "A5301T"], ["TS21", "C5001T"], ["SN26", "A1402S"], ["KC23", "A1401K"], ["SA28", "A1305SA"], ["TS25", "A1304T II"], ["TS25", "A1304T"], ["SA25", "A1303SA"], ["SA24", "A1302SA"], ["SN23", "A1301S"], ["SN22", "A1101S"], ["SA22", "A3015SA"], ["SN21", "A3014S"], ["TS22", "A3013T"], ["CA21", "A3012CA"], ["SA21", "A3011SA"], ["MA21", "C3003P"], ["KC21", "C3002K"], ["HI21", "C3001H"], ] #with subscriber_id #LOG_FORMAT = /?A?S+ ?S+ ?S+ ?[([^?]]+)?] "([^"]+)" ?S+ ?S+ "([^"]+)" "([^"]+)" "([^"]+)".*?Z/ LOG_FORMAT = /?A?S+ ?S+ ?S+ ?[([^?]]+)?] "([^"]+)" ?S+ ?S+ "([^"]+)" "([^"]+)".*?Z/ TIME_FORMAT = %r!?A(?d?d)/([A-Z][a-z][a-z])/(?d?d?d?d):(?d?d):(?d?d):(?d?d)! MONTH = %w(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec) TARGET_REQUEST = /?AGET ?/ez?/(index?.html)? HTTP.*?Z/ EZ_USER_AGENT_FORMAT = /?AKDDI-(?S+) ?S+ ?(GUI?) ?S+.*?Z/ class AccessLog def initialize(s) _, date, @request, @referer, @user_agent, @subscriber_id = LOG_FORMAT.match(s).to_a m = TIME_FORMAT.match(date) @date = Time.local(m[3].to_i, MONTH.index(m[2])+1, m[1].to_i, m[4].to_i, m[5].to_i, m[6].to_i) end attr_accessor :date, :request, :referer, :user_agent, :subscriber_id end logs = Array.new ARGV.each do |filename| IO.foreach(filename) do |line| logs << AccessLog.new(line) end end count = Hash.new() logs.each do |log| unless ((log.referer =~ %r!?Ahttp://(gwmj?.jp|(www?.)?emkn?.com).*?Z!) != nil) or ((log.request =~ TARGET_REQUEST) == nil) or (log.date < date_from) or (log.date >= date_to) then u = EZ_USER_AGENT_FORMAT.match(log.user_agent).to_a[1] unless u == nil then # p log count[DEVICE.assoc(u)[1]] = Hash.new(0) if count[DEVICE.assoc(u)[1]] == nil count[DEVICE.assoc(u)[1]][log.date.month.to_s + "/" + log.date.day.to_s] += 1 end end end # print section keys = count.keys.sort keys.each do |device| print ",#{device}" end print ",total?n" f = Date.new(date_from.year, date_from.month, date_from.day) t = Date.new(date_to.year, date_to.month, date_to.day) device_total = Hash.new(0) f.upto(t - 1) do |day| d = day.month.to_s + "/" + day.day.to_s print "#{d}" day_total = 0 keys.each do |device| print ",#{count[device][d]}" day_total += count[device][d] device_total[device] += count[device][d] end print ",#{day_total}?n" device_total["total"] += day_total end print "total" keys.each do |device| print ",#{device_total[device]}" end print ",#{device_total["total"]}?n" print "ratio" keys.each do |device| print ",#{device_total[device].to_f/device_total["total"]}" end print ",#{device_total["total"].to_f/device_total["total"]}?n"