- access_log Java
- Sunday, January 3rd, 2010 at 5:36:31pm MST
- package accesslog;
- // based on http://www.wellho.net/resources/ex.php4?item=j714/Access.java
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.HashSet;
- import java.util.NoSuchElementException;
- import java.util.StringTokenizer;
- /**
- * Access Log file (NCSA format) analysis
- */
- public class Access
- {
- String host;
- String time;
- String request;
- int status;
- int size;
- String referer;
- String userAgent;
- // 200.70.150.125 - - [26/Oct/2009:01:42:33 +0100] "GET /favicon.ico HTTP/1.1" 404 183 "-" "Mozilla/5.0 (Windows; U; Windows NT 6.0; pt-BR; rv:1.9.1.3) Gecko/20090824 Firefox/3.5.3"
- {
- String skip;
- host = splitter.nextToken();
- skip = splitter.nextToken();
- skip = splitter.nextToken("[");
- time = splitter.nextToken(" \t");
- skip = splitter.nextToken("\"");
- request = splitter.nextToken();
- skip = splitter.nextToken(" \t");
- try
- {
- }
- {
- size = 0;
- }
- skip = splitter.nextToken("\"");
- referer = splitter.nextToken();
- if (referer != null && !referer.equals("") && !referer.equals(" "))
- // "-" means "no referer", but "" would result in the userAgent ending up in the skip
- {
- skip = splitter.nextToken();
- }
- try
- {
- userAgent = splitter.nextToken();
- }
- {
- }
- }
- {
- int total = 0;
- int msie = 0;
- int firefox = 0;
- int opera = 0; // some Opera also contain "MSIE"
- int chrome = 0;
- int safari = 0; // Chrome also contains "Safari"
- int konqueror = 0;
- int k_meleon = 0;
- int seamonkey = 0;
- int bot = 0;
- int other = 0;
- boolean found = false;
- int windows = 0, linux = 0, apple = 0, otherOs = 0;
- int mnenhy = 0;
- HashSet<String> hosts = new HashSet<String>();
- String line = "";
- String agent = "";
- while ((line = source.readLine()) != null)
- {
- Access access = new Access(line);
- if (hosts.contains(access.host))
- {
- continue;
- }
- hosts.add(access.host);
- total++;
- // System.out.println(access.userAgent);
- found = false;
- agent = access.userAgent.toLowerCase();
- if (agent.contains("firefox") || agent.contains("iceweasel") || agent.contains("shiretoko") || agent.contains("minefield"))
- {
- firefox++;
- found = true;
- }
- if (agent.contains("konqueror"))
- {
- if (found)
- {
- }
- konqueror++;
- found = true;
- }
- if (agent.contains("seamonkey"))
- {
- if (found)
- {
- }
- seamonkey++;
- found = true;
- }
- if (agent.contains("k-meleon"))
- {
- if (found)
- {
- }
- k_meleon++;
- found = true;
- }
- if (agent.contains("opera"))
- {
- if (found)
- {
- }
- opera++;
- found = true;
- }
- // Opera 8 claims to be also MSIE
- else if (agent.contains("msie"))
- {
- if (found)
- {
- }
- msie++;
- found = true;
- }
- if (agent.contains("chrome"))
- {
- if (found)
- {
- }
- chrome++;
- found = true;
- }
- else if (agent.contains("safari"))
- {
- if (found)
- {
- }
- safari++;
- found = true;
- }
- if ((agent.contains("bot") && !agent.contains("surveybot")) || agent.contains("yandex") || agent.contains("catalog") || agent.contains("about.ask.com")
- || agent.contains("slurp") || agent.contains("trend micro") || agent.contains("yahoocache") || agent.contains("archive") || agent.contains("spider")
- || agent.contains("crawler"))
- {
- if (found)
- {
- }
- bot++;
- found = true;
- }
- else
- {
- if (agent.contains("windows"))
- {
- windows++;
- }
- else if (agent.contains("linux"))
- {
- linux++;
- }
- else if (agent.contains("mac") )
- {
- apple++;
- }
- else
- {
- otherOs++;
- }
- }
- if (!found)
- {
- other++;
- // System.out.println("unknown: " + access.userAgent);
- }
- if (agent.contains("mnenhy"))
- {
- mnenhy++;
- }
- } // end of loop over the access_log file content
- }
- }
advertising
Update the Post
Either update this post and resubmit it with changes, or make a new post.
You may also comment on this post.
Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.