Part of Slepp's ProjectsPastebinTURLImagebinFilebin
Feedback -- English French German Japanese
Create Upload Newest Tools Donate
Sign In | Create Account

access_log Java
Sunday, January 3rd, 2010 at 5:36:31pm MST 

  1. package accesslog;
  2.  
  3. // based on http://www.wellho.net/resources/ex.php4?item=j714/Access.java
  4.  
  5. import java.io.BufferedReader;
  6. import java.io.FileReader;
  7. import java.io.IOException;
  8. import java.util.HashSet;
  9. import java.util.NoSuchElementException;
  10. import java.util.StringTokenizer;
  11.  
  12. /**
  13. * Access Log file (NCSA format) analysis
  14. */
  15. public class Access
  16. {
  17.   String host;
  18.   String time;
  19.   String request;
  20.   int status;
  21.   int size;
  22.   String referer;
  23.   String userAgent;
  24.  
  25.   // 200.70.150.125 - - [26/Oct/2009:01:42:33 +0100] "GET /favicon.ico HTTP/1.1" 404 183 "-" "Mozilla/5.0 (Windows; U; Windows NT 6.0; pt-BR; rv:1.9.1.3) Gecko/20090824 Firefox/3.5.3"
  26.  
  27.   public Access(String data)
  28.   {
  29.  
  30.     StringTokenizer splitter = new StringTokenizer(data, " \t");
  31.     String skip;
  32.  
  33.     host = splitter.nextToken();
  34.     skip = splitter.nextToken();
  35.     skip = splitter.nextToken("[");
  36.     time = splitter.nextToken(" \t");
  37.     skip = splitter.nextToken("\"");
  38.  
  39.     request = splitter.nextToken();
  40.     skip = splitter.nextToken(" \t");
  41.  
  42.     status = Integer.parseInt(splitter.nextToken(" \t"));
  43.  
  44.     try
  45.     {
  46.       size = Integer.parseInt(splitter.nextToken(" \t"));
  47.     }
  48.     catch (Exception e)
  49.     {
  50.       size = 0;
  51.     }
  52.  
  53.     skip = splitter.nextToken("\"");
  54.     referer = splitter.nextToken();
  55.     if (referer != null && !referer.equals("") && !referer.equals(" "))
  56.       // "-" means "no referer", but "" would result in the userAgent ending up in the skip
  57.     {
  58.       skip = splitter.nextToken();
  59.     }
  60.  
  61.     try
  62.     {
  63.       userAgent = splitter.nextToken();
  64.     }
  65.     catch (NoSuchElementException ex)
  66.     {
  67.       System.err.println("problem");
  68.     }
  69.  
  70.   }
  71.  
  72.   public static void main(String[] args) throws IOException
  73.   {
  74.     BufferedReader source = new BufferedReader(
  75.         new FileReader("/home/monika/Homepage/other/access_log/test/access_log_2009_w44-0"));
  76.  
  77.     int total = 0;
  78.     int msie = 0;
  79.     int firefox = 0;
  80.     int opera = 0; // some Opera also contain "MSIE"
  81.     int chrome = 0;
  82.     int safari = 0; // Chrome also contains "Safari"
  83.     int konqueror = 0;
  84.     int k_meleon = 0;
  85.     int seamonkey = 0;
  86.     int bot = 0;
  87.     int other = 0;
  88.     boolean found = false;
  89.     int windows = 0, linux = 0, apple = 0, otherOs = 0;
  90.     int mnenhy = 0;
  91.  
  92.     HashSet<String> hosts = new HashSet<String>();
  93.  
  94.     String line = "";
  95.     String agent = "";
  96.     while ((line = source.readLine()) != null)
  97.     {
  98.       Access access = new Access(line);
  99.  
  100.       if (hosts.contains(access.host))
  101.       {
  102.         continue;
  103.       }
  104.       hosts.add(access.host);
  105.       total++;
  106.  
  107.       // System.out.println(access.userAgent);
  108.       found = false;
  109.       agent = access.userAgent.toLowerCase();
  110.       if (agent.contains("firefox") || agent.contains("iceweasel") || agent.contains("shiretoko") || agent.contains("minefield"))
  111.       {
  112.         firefox++;
  113.         found = true;
  114.       }
  115.  
  116.       if (agent.contains("konqueror"))
  117.       {
  118.         if (found)
  119.         {
  120.           System.err.println("duplicate: " + access.userAgent);
  121.         }
  122.         konqueror++;
  123.         found = true;
  124.       }
  125.  
  126.       if (agent.contains("seamonkey"))
  127.       {
  128.         if (found)
  129.         {
  130.           System.err.println("duplicate: " + access.userAgent);
  131.         }
  132.         seamonkey++;
  133.         found = true;
  134.       }
  135.  
  136.       if (agent.contains("k-meleon"))
  137.       {
  138.         if (found)
  139.         {
  140.           System.err.println("duplicate: " + access.userAgent);
  141.         }
  142.         k_meleon++;
  143.         found = true;
  144.       }
  145.  
  146.       if (agent.contains("opera"))
  147.       {
  148.         if (found)
  149.         {
  150.           System.err.println("duplicate: " + access.userAgent);
  151.         }
  152.         opera++;
  153.         found = true;
  154.       }
  155.       // Opera 8 claims to be also MSIE
  156.       else if (agent.contains("msie"))
  157.       {
  158.         if (found)
  159.         {
  160.           System.err.println("duplicate: " + access.userAgent);
  161.         }
  162.         msie++;
  163.         found = true;
  164.       }
  165.  
  166.       if (agent.contains("chrome"))
  167.       {
  168.         if (found)
  169.         {
  170.           System.err.println("duplicate: " + access.userAgent);
  171.         }
  172.         chrome++;
  173.         found = true;
  174.       }
  175.       else if (agent.contains("safari"))
  176.       {
  177.         if (found)
  178.         {
  179.           System.err.println("duplicate: " + access.userAgent);
  180.         }
  181.         safari++;
  182.         found = true;
  183.       }
  184.      
  185.       if ((agent.contains("bot") && !agent.contains("surveybot")) || agent.contains("yandex") || agent.contains("catalog") || agent.contains("about.ask.com")
  186.           || agent.contains("slurp") || agent.contains("trend micro") || agent.contains("yahoocache") || agent.contains("archive") || agent.contains("spider")
  187.           || agent.contains("crawler"))
  188.       {
  189.         if (found)
  190.         {
  191.           System.err.println("duplicate: " + access.userAgent);
  192.         }
  193.         bot++;
  194.         found = true;
  195.       }
  196.       else
  197.       {
  198.         if (agent.contains("windows"))
  199.         {
  200.           windows++;
  201.         }
  202.         else if (agent.contains("linux"))
  203.         {
  204.           linux++;
  205.         }
  206.         else if (agent.contains("mac") )
  207.         {
  208.           apple++;
  209.         }
  210.         else
  211.         {
  212.           otherOs++;
  213.         }
  214.          
  215.       }
  216.  
  217.       if (!found)
  218.       {
  219.         other++;
  220.         // System.out.println("unknown: " + access.userAgent);
  221.       }
  222.      
  223.       if (agent.contains("mnenhy"))
  224.       {
  225.         mnenhy++;
  226.       }
  227.  
  228.     } // end of loop over the access_log file content
  229.  
  230.     System.out.println("total: " + total);
  231.     System.out.println("firefox: " + firefox);
  232.     System.out.println("msie: " + msie);
  233.     System.out.println("opera: " + opera);
  234.     System.out.println("chrome: " + chrome);
  235.     System.out.println("safari: " + safari);
  236.     System.out.println("konqueror: " + konqueror);
  237.     System.out.println("k_meleon: " + k_meleon);
  238.     System.out.println("seamonkey: " + seamonkey);
  239.     System.out.println("bot: " + bot);
  240.     System.out.println("other: " + other);
  241.    
  242.     System.out.println("---");
  243.    
  244.     System.out.println("windows: "+windows);
  245.     System.out.println("linux: "+linux);
  246.     System.out.println("apple: "+apple);
  247.     System.out.println("other OS: "+otherOs);
  248.    
  249.     System.out.println("---");
  250.    
  251.     System.out.println("mnenhy: "+mnenhy);
  252.  
  253.   }
  254. }

advertising

Update the Post

Either update this post and resubmit it with changes, or make a new post.

You may also comment on this post.

update paste below
details of the post (optional)

Note: Only the paste content is required, though the following information can be useful to others.

Save name / title?

(space separated, optional)



Please note that information posted here will expire by default in one month. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.

fantasy-obligation fantasy-obligation