| URLTools.java |
package de.spieleck.net;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.BitSet;
/**
* XXX This is work in progress, i.e. works as long as needed so far.
*
* Part of below code has been modified from Suns JDK Sources :-(
* Static methods to help certain problems with urls.
* Some of these are rather special String functions than
* actually working with the datatype URL.
*/
public class URLTools
{
/** Default assumption for urlDecode (XXX should be use UTF-8?) */
public static String defaultEncoding = "ISO-8859-1";
protected final static String FILEPROTO = "file:/";
static protected int MAXBYTESPERCHAR = 8;
static protected BitSet dontNeedEncoding;
static
{
dontNeedEncoding = new BitSet(256);
int i;
for (i = 'a'; i <= 'z'; i++)
dontNeedEncoding.set(i);
for (i = 'A'; i <= 'Z'; i++)
dontNeedEncoding.set(i);
for (i = '0'; i <= '9'; i++)
dontNeedEncoding.set(i);
dontNeedEncoding.set('-');
dontNeedEncoding.set('_');
dontNeedEncoding.set('.');
dontNeedEncoding.set('*');
}
/**
* Do not construct this :-)
*/
private URLTools() { }
/*
* This class is similar to java.net.URLEncoder <B>but</B>
* the <code>encode()</code>-method encodes
* according to <code>RFC2396</CODE> instead of
* into a MIME format called
* "<code>x-www-form-urlencoded</code>" format.
* <p>
* To convert a <code>String</code>, each character is examined in turn:
* <ul>
* <li>The ASCII characters '<code>a</code>' through '<code>z</code>',
* '<code>A</code>' through '<code>Z</code>', '<code>0</code>'
* through '<code>9</code>', and ".", "-",
* "*", "_" remain the same.
* <li>All other characters are converted into the 3-character string
* "<code>%<i>xy</i></code>", where <i>xy</i> is the two-digit
* hexadecimal representation of the lower 8-bits of the character.
* </ul>
*
* <P>
* Additional there is the <code>encodeQueryString()</code> method,
* which currently is a mere hack.
*
* @param s <code>String</code> to be translated.
* @return the translated <code>String</code>.
*/
public static String encode(String s)
{
StringBuffer out = new StringBuffer(3 * s.length());
ByteArrayOutputStream buf = new ByteArrayOutputStream(MAXBYTESPERCHAR);
OutputStreamWriter writer = new OutputStreamWriter( buf);
for (int i = 0; i < s.length(); i++)
{
int c = (int)s.charAt(i);
if (dontNeedEncoding.get(c))
out.append((char)c);
else
{
// convert to external encoding before hex conversion
try
{
writer.write(c);
writer.flush();
}
catch (IOException e)
{
buf.reset();
continue;
}
byte[] ba = buf.toByteArray();
for (int j = 0; j < ba.length; j++)
{
out.append('%');
out.append(Character.forDigit((ba[j] >> 4) & 0xF,
16));
out.append(Character.forDigit(ba[j] & 0xF, 16));
}
buf.reset();
}
}
return out.toString();
}
/**
* Currently a quick hack to encode certain special characters
* in a URL's querystring part.
* XXX Read the RFC's and do it the right way.
*/
public static String encodeQueryString(String s)
{
return s.replace(' ', '+');
}
/**
* Ugly method for the hopefully most compatible way to
* create a URI-String from a local file identifier.
* Is similar to java.io.File.toURL in JDK1.2+
*/
public static URL toURL(String fname)
throws MalformedURLException
{
File fi = new File(fname);
String path = fi.getAbsolutePath();
if (File.separatorChar != '/')
path = path.replace(File.separatorChar, '/');
if (!path.startsWith("/"))
path = "/" + path;
if (!path.endsWith("/") && fi.isDirectory())
path = path + "/";
return new URL("file", "", path);
}
/** XXX
* method to obtain the absolut path of a file-url.
* this method is necessary due to a missing
* constructor File(URL url) in java.io.File (JDK1.3.1)
*/
public static String getFileString(URL url)
{
String proto = url.getProtocol();
if (!"file".equals(proto))
return url.toExternalForm();
else
return url.getPath();
}
/**
* Alternative call with String Argument
*/
public static String getFileString(String url)
{
try
{
return getFileString(new URL(url));
}
catch (MalformedURLException e)
{
// I've seen file:/ and file:// and file:///
// in the real world!
// XXX On the other hand in the *X-world any leading
// "/" has a meaning, of root, so one should NOT eliminate
// everything :-(
if (url.startsWith(FILEPROTO))
{
int len = FILEPROTO.length();
while (url.charAt(len) == '/'
&& url.charAt(len + 1) == '/')
len++;
return url.substring(len);
}
return url;
}
}
/**
* Decode String from <CODE>application/x-www-form-urlencoded</CODE>
* to regular Java encoding.
*/
public static String decode(String s)
{
try
{
return decode(s, defaultEncoding);
}
catch ( UnsupportedEncodingException e )
{
return s;
}
}
/**
* Decodes a <code>application/x-www-form-urlencoded</code> to regular
* Java encoding with specified URL encoding.
*
* @param s the <code>String</code> to decode
* @param enc The name of a supported encoding</a>.
* @return the newly decoded <code>String</code>
* @exception UnsupportedEncodingException
* If the named encoding is not supported
*/
public static String decode(String s, String enc)
throws UnsupportedEncodingException
{
byte[] bytes = null;
boolean changed = false;
StringBuffer sb = new StringBuffer();
int numChars = s.length();
int i = 0;
while (i < numChars)
{
char ch = s.charAt(i++);
switch (ch)
{
case '+':
sb.append(' ');
changed = true;
break;
case '%':
// (numChars-i)/3 is an upper bound for remaining bytes
if ( bytes == null )
bytes = new byte[(numChars-i)/3];
int pos = 0;
try
{
while ( ch == '%' && i + 2 <= numChars )
{
bytes[pos++] = (byte)Integer.parseInt(
s.substring(i,i+2),16);
i += 3;
if (i > numChars)
break;
ch = s.charAt(i-1);
}
sb.append(new String(bytes, 0, pos, enc));
// Incomplete escapes at end and are quietly ignored.
if ( ch == '%' && i+2 >= numChars )
sb.append(s.substring(i));
}
catch (NumberFormatException e)
{
// Numberformat problems are quitely ignored!
sb.append(new String(bytes, 0, pos, enc));
e.printStackTrace();
System.err.println("*** <"+s+">");
sb.append('%');
}
changed = true;
break;
default:
sb.append(ch);
break;
}
}
return (changed ? sb.toString() : s);
}
}
//
// Jacson - Text Filtering with Java.
// Copyright (C) 2002 Frank S. Nestel (nestefan -at- users.sourceforge.net)
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
| URLTools.java |