Loading...

Parse user agent strings into structured data based on BrowserScope data with logstash

The Apache HTTP Server logs user agent strings. The user agent string contains information like family, operating system, version, and device. Logstash offers a filter plugin to parse this information.

Following exemplary log entry.

66.249.76.151 - - [04/Dec/2016:02:14:01 +0100] "GET /wp/docker-behind-proxy-with-cntlm/ HTTP/1.1" 200 14386 "-" "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"

Use in the filter section following configuration for useragent.

input { stdin { } }
filter {
  grok {
    match => { "message" => "%{COMBINEDAPACHELOG}" }
  }
  date {
    match => [ "timestamp" , "dd/MMM/yyyy:HH:mm:ss Z" ]
  }
  geoip {
    source => "clientip"
    target => "geoip"
    database => "./GeoLite2-City.mmdb"
    add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
    add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}"  ]
  }
  mutate {
    convert => [ "[geoip][coordinates]", "float"]
    add_field => { "dnsname" => "%{clientip}" }
  }
  dns {
    reverse => [ "dnsname" ]
    action => "replace"
  }
  useragent {
    source => "agent"
  }
}
output { stdout { codec => "rubydebug" } }

You will receive this output

{
        "request" => "/wp/docker-behind-proxy-with-cntlm/",
          "agent" => "\"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\"",
          "minor" => "1",
           "auth" => "-",
          "ident" => "-",
       "os_minor" => "0",
       "os_major" => "6",
          "major" => "2",
       "clientip" => "66.249.76.151",
       "@version" => "1",
           "host" => "omega",
        "dnsname" => "crawl-66-249-76-151.googlebot.com",
      "timestamp" => "04/Dec/2016:02:14:01 +0100",
          "geoip" => {
              "timezone" => "America/Los_Angeles",
                    "ip" => "66.249.76.151",
              "latitude" => 37.419200000000004,
           "coordinates" => [
            [0] -122.0574,
            [1] 37.419200000000004
        ],
        "continent_code" => "NA",
             "city_name" => "Mountain View",
         "country_code2" => "US",
          "country_name" => "United States",
              "dma_code" => 807,
         "country_code3" => "US",
           "region_name" => "California",
              "location" => [
            [0] -122.0574,
            [1] 37.419200000000004
        ],
           "postal_code" => "94043",
             "longitude" => -122.0574,
           "region_code" => "CA"
    },
             "os" => "Android 6.0.1",
           "verb" => "GET",
        "message" => "66.249.76.151 - - [04/Dec/2016:02:14:01 +0100] \"GET /wp/docker-behind-proxy-with-cntlm/ HTTP/1.1\" 200 14386 \"-\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\"",
       "referrer" => "\"-\"",
     "@timestamp" => 2016-12-04T01:14:01.000Z,
       "response" => "200",
          "bytes" => "14386",
           "name" => "Googlebot",
        "os_name" => "Android",
    "httpversion" => "1.1",
         "device" => "Spider"
}