HOME - Recent Changes - Search:

Academic Work


Personal

* pot de départ


dblp


(:twitter:)

-----

[ edit | logout ]
[ help | sandbox | passwd ]

Get the contents of a URL as a String

#############################

Update: below this class there is a patch that shows how to set the user agent. It's very useful if a page is "protected" against downloaders.

import java.io.DataInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

/**
 * Get a file from the Internet.
 *
 * @author jabba
 */
public class MyGetUrl
{
   /**
    * The URL of the file we want to fetch.
    */
   private URL url;

   /**
    * Get the content of the given URL as a string.
    *
    * @param url The URL we want to get.
    * @return The content of the URL as a string.
    */
   public String getURL(String url)
   {
      this.url = this.parseURLString(url);
      URLConnection urlc = this.getURLConnection(this.url);
      return getContent(urlc);
   }

   /**
    * Gets the content of the given URL as an array of strings.
    *
    * @param address The URL of the file we want to read.
    * @return An array of strings containing the lines of the given file (URL).
    */
   public String[] getURLasLines(String address)
   {
      String all = this.getURL(address);
      all = all.replaceAll("\r","\n");
      return all.split("\n");
   }

   /**
    * It can check if a given URL is alive or not.
    *
    * @param address URL address that we want to check if it's available or not.
    * @return True, if it's available. False, otherwise.
    */
   public boolean checkHttpUrl(String address)
   {
      try {
         URL url = parseURLString(address);
         HttpURLConnection huc = (HttpURLConnection)url.openConnection();
         int response = huc.getResponseCode();
         if (response == 200)  return true;
         else                  return false;
      } catch (java.io.IOException ioe) {}
      return false;
   }

   /*
    * =======================================================================
    *                and here come the private functions
    * =======================================================================
    */

   /**
    * Get the content of a URL as a string.
    *
    * @param urlc URLConnection object the URL we want to read.
    * @return String content of the given URL.
    */
   private String getContent(URLConnection urlc)
   {
      try
      {
         InputStream is = urlc.getInputStream();
         DataInputStream ds = new DataInputStream(is);
         int length;
         length = urlc.getContentLength();
         if (length != -1)
         {
            byte b[] = new byte[length];
            ds.readFully(b);
            String s = new String(b);
            return s;
         }
         else
         {
            StringBuilder s = new StringBuilder();
            int i = is.read();
            while (i != -1)
            {
               s.append( (char)i );
               i = is.read();
            }
            return new String(s);
         }
      }
      catch (Exception e)
      {
         System.err.println("Error: I/O error while trying to read the network file!");
         System.exit(-1);
      }
      return null;          // it cannot arrive here
   }


   /**
    * Checks if the URL is well-formed.
    *
    * @param address The URL we want to check.
    * @return The URL itself if it is correct. If it's not correct, the program terminates.
    */
   private URL parseURLString(String address)
   {
      URL url = null;

      try {
         url = new URL(address);
      }
      catch (MalformedURLException e)
      {
         System.err.println("Error: the URL \""+address+"\" is malformed!");
         System.exit(-1);
      }
      return url;
   }

   /**
    * Gets a URLConnection object towards the given URL.
    *
    * @param url URL object representation of the file.
    * @return A URLConnection object towards the given URL.
    */
   private URLConnection getURLConnection(URL url)
   {
      URLConnection myUC = null;

      try {
         myUC = url.openConnection();
         myUC.connect();
      }
      catch (Exception e)
      {
         System.err.println("Error: network error while trying to open network connection!");
         System.exit(-1);
      }
      return myUC;
   }

} // class MyGetUrl

Setting the user agent

You need the following modifications in the code above:

public String getURL(String url)
{
      this.url = this.parseURLString(url);
//      URLConnection urlc = this.getURLConnection(this.url);
      URLConnection urlc = this.getHttpURLConnection(this.url);   // change
      return getContent(urlc);
}

private URLConnection getHttpURLConnection(URL url)
{
      HttpURLConnection myUC = null;   // change

      try {
         myUC = (HttpURLConnection) url.openConnection();   // change
         myUC.setRequestProperty ("User-agent",   // change
             "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.5) Gecko/20091102 Firefox/3.5.5");   // change
         myUC.connect();
      }
      catch (Exception e)
      {
         System.err.println("Error: network error while trying to open network connection!");
         System.exit(-1);
      }
      return myUC;
}
Cloud City


anime | bash | blogs | bsd | c/c++ | c64 | calc | comics | convert | cube | del.icio.us | digg | east | eBooks | egeszseg | elite | firefox | flash | fun | games | gimp | google | groovy | hardware | hit&run | howto | java | javascript | knife | lang | latex | liferay | linux | lovecraft | magyar | maths | movies | music | p2p | perl | pdf | photoshop | php | pmwiki | prog | python | radio | recept | rts | scala | scene | sci-fi | scripting | security | shell | space | súlyos | telephone | torrente | translate | ubuntu | vim | wallpapers | webutils | wikis | windows


Blogs and Dev.

* Ubuntu Incident
* Python Adventures
* me @ GitHub


Places

Debrecen | France | Hungary | Montreal | Nancy


Notes

full circle | km


Hobby Projects

* Jabba's Codes
* PmWiki
* Firefox
* PHP
* JavaScript
* Scriptorium
* Tutorials
* me @ GitHub


Quick Links


[ edit ]

View - Edit - History - Attach - Print *** Report - Recent Changes - Search
Page last modified on 2010 May 04, 02:34