とあるURLが外からアクセスされた数と、そのユーザーエージェントを知りたい

http://d.hatena.ne.jp/yfp/20040818#p2のやつ。HDDに埋もれると嫌だから、貼っとく。

#! /usr/bin/ruby

require 'getopts'
require 'time'
require 'date'

unless getopts('d', 'from:', 'to:')
  abort "usage: #$0 [-d] [--from=2xxx-xx-xx] [--to=2xxx-xx-xx] log_file1 log_file2..."
end

if $OPT_from != nil
  date_from = Time.parse($OPT_from)
else
  date_from = Time.parse(Time.now.strftime("%Y/%m/1"))
end

if $OPT_to != nil
  date_to = Time.parse($OPT_to)
else
  date_to = Time.parse(Date.today.to_s)
end

if $OPT_d
  date_from = Time.parse((Date.today - 1).to_s)
  date_to = Time.parse(Date.today.to_s)
end

if date_from >= date_to then
  p "date error!", date_from, date_to
  abort "usage: #$0 [-d] [--from=2xxx-xx-xx] [--to=2xxx-xx-xx] log_file1 log_file2..."
end

if ARGV.size == 0 then
  abort "usage: #$0 [-d] [--from=2xxx-xx-xx] [--to=2xxx-xx-xx] log_file1 log_file2..."
end

DEVICE = [["SA31", "W21SA"],
          ["SN31", "W21S"],
          ["KC32", "W21K"],
          ["HI32", "W21H"],
          ["KC31", "W11K"],
          ["HI31", "W11H"],
          ["ST22", "INFOBAR"],
          ["TS28", "A5506T"],
          ["SA27", "A5505SA"],
          ["TS27", "A5504T"],
          ["SA26", "A5503SA"],
          ["KC24", "A5502K"],
          ["KC25", "A5502K"],
          ["TS26", "A5501T"],
          ["CA26", "A5407CA"],
          ["CA25", "A5406CA"],
          ["ST23", "A5405SA"],
          ["SN25", "A5404S"],
          ["CA24", "A5403CA"],
          ["SN24", "A5402S"],
          ["CA23", "A5401CA II"],
          ["CA23", "A5401CA"],
          ["ST21", "A5306ST"],
          ["KC22", "A5305K"],
          ["TS24", "A5304T"],
          ["HI24", "A5303H II"],
          ["HI23", "A5303H"],
          ["CA22", "A5302CA"],
          ["TS23", "A5301T"],
          ["TS21", "C5001T"],
          ["SN26", "A1402S"],
          ["KC23", "A1401K"],
          ["SA28", "A1305SA"],
          ["TS25", "A1304T II"],
          ["TS25", "A1304T"],
          ["SA25", "A1303SA"],
          ["SA24", "A1302SA"],
          ["SN23", "A1301S"],
          ["SN22", "A1101S"],
          ["SA22", "A3015SA"],
          ["SN21", "A3014S"],
          ["TS22", "A3013T"],
          ["CA21", "A3012CA"],
          ["SA21", "A3011SA"],
          ["MA21", "C3003P"],
          ["KC21", "C3002K"],
          ["HI21", "C3001H"],
          ]
#with subscriber_id
#LOG_FORMAT = /?A?S+ ?S+ ?S+ ?[([^?]]+)?] "([^"]+)" ?S+ ?S+ "([^"]+)" "([^"]+)" "([^"]+)".*?Z/
LOG_FORMAT = /?A?S+ ?S+ ?S+ ?[([^?]]+)?] "([^"]+)" ?S+ ?S+ "([^"]+)" "([^"]+)".*?Z/

TIME_FORMAT = %r!?A(?d?d)/([A-Z][a-z][a-z])/(?d?d?d?d):(?d?d):(?d?d):(?d?d)!
MONTH = %w(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)

TARGET_REQUEST = /?AGET ?/ez?/(index?.html)? HTTP.*?Z/

EZ_USER_AGENT_FORMAT = /?AKDDI-(?S+) ?S+ ?(GUI?) ?S+.*?Z/

class AccessLog
  def initialize(s)
    _, date, @request, @referer, @user_agent, @subscriber_id = LOG_FORMAT.match(s).to_a
    m = TIME_FORMAT.match(date)
    @date = Time.local(m[3].to_i, MONTH.index(m[2])+1, m[1].to_i, m[4].to_i, m[5].to_i, m[6].to_i)
  end
  attr_accessor :date, :request, :referer, :user_agent, :subscriber_id
end


logs = Array.new

ARGV.each do |filename|
  IO.foreach(filename) do |line|
    logs << AccessLog.new(line)
  end
end

count = Hash.new()

logs.each do |log|
  unless ((log.referer =~ %r!?Ahttp://(gwmj?.jp|(www?.)?emkn?.com).*?Z!) != nil) or ((log.request =~ TARGET_REQUEST) == nil) or (log.date < date_from) or (log.date >= date_to) then
    u = EZ_USER_AGENT_FORMAT.match(log.user_agent).to_a[1]
    unless u == nil then
#      p log
      count[DEVICE.assoc(u)[1]] = Hash.new(0) if count[DEVICE.assoc(u)[1]] == nil
      count[DEVICE.assoc(u)[1]][log.date.month.to_s + "/" + log.date.day.to_s] += 1
    end
  end
end

# print section

keys = count.keys.sort
keys.each do |device|
  print ",#{device}"
end
print ",total?n"

f = Date.new(date_from.year, date_from.month, date_from.day)
t = Date.new(date_to.year, date_to.month, date_to.day)
device_total = Hash.new(0)
f.upto(t - 1) do |day|
  d = day.month.to_s + "/" + day.day.to_s
  print "#{d}"
  day_total = 0
  keys.each do |device|
    print ",#{count[device][d]}"
    day_total += count[device][d]
    device_total[device] += count[device][d]
  end
  print ",#{day_total}?n"
  device_total["total"] += day_total
end

print "total"
keys.each do |device|
  print ",#{device_total[device]}"
end
print ",#{device_total["total"]}?n"

print "ratio"
keys.each do |device|
  print ",#{device_total[device].to_f/device_total["total"]}"
end
print ",#{device_total["total"].to_f/device_total["total"]}?n"