URLTools.java |
package de.spieleck.net; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.util.BitSet; /** * XXX This is work in progress, i.e. works as long as needed so far. * * Part of below code has been modified from Suns JDK Sources :-( * Static methods to help certain problems with urls. * Some of these are rather special String functions than * actually working with the datatype URL. */ public class URLTools { /** Default assumption for urlDecode (XXX should be use UTF-8?) */ public static String defaultEncoding = "ISO-8859-1"; protected final static String FILEPROTO = "file:/"; static protected int MAXBYTESPERCHAR = 8; static protected BitSet dontNeedEncoding; static { dontNeedEncoding = new BitSet(256); int i; for (i = 'a'; i <= 'z'; i++) dontNeedEncoding.set(i); for (i = 'A'; i <= 'Z'; i++) dontNeedEncoding.set(i); for (i = '0'; i <= '9'; i++) dontNeedEncoding.set(i); dontNeedEncoding.set('-'); dontNeedEncoding.set('_'); dontNeedEncoding.set('.'); dontNeedEncoding.set('*'); } /** * Do not construct this :-) */ private URLTools() { } /* * This class is similar to java.net.URLEncoder <B>but</B> * the <code>encode()</code>-method encodes * according to <code>RFC2396</CODE> instead of * into a MIME format called * "<code>x-www-form-urlencoded</code>" format. * <p> * To convert a <code>String</code>, each character is examined in turn: * <ul> * <li>The ASCII characters '<code>a</code>' through '<code>z</code>', * '<code>A</code>' through '<code>Z</code>', '<code>0</code>' * through '<code>9</code>', and ".", "-", * "*", "_" remain the same. * <li>All other characters are converted into the 3-character string * "<code>%<i>xy</i></code>", where <i>xy</i> is the two-digit * hexadecimal representation of the lower 8-bits of the character. * </ul> * * <P> * Additional there is the <code>encodeQueryString()</code> method, * which currently is a mere hack. * * @param s <code>String</code> to be translated. * @return the translated <code>String</code>. */ public static String encode(String s) { StringBuffer out = new StringBuffer(3 * s.length()); ByteArrayOutputStream buf = new ByteArrayOutputStream(MAXBYTESPERCHAR); OutputStreamWriter writer = new OutputStreamWriter( buf); for (int i = 0; i < s.length(); i++) { int c = (int)s.charAt(i); if (dontNeedEncoding.get(c)) out.append((char)c); else { // convert to external encoding before hex conversion try { writer.write(c); writer.flush(); } catch (IOException e) { buf.reset(); continue; } byte[] ba = buf.toByteArray(); for (int j = 0; j < ba.length; j++) { out.append('%'); out.append(Character.forDigit((ba[j] >> 4) & 0xF, 16)); out.append(Character.forDigit(ba[j] & 0xF, 16)); } buf.reset(); } } return out.toString(); } /** * Currently a quick hack to encode certain special characters * in a URL's querystring part. * XXX Read the RFC's and do it the right way. */ public static String encodeQueryString(String s) { return s.replace(' ', '+'); } /** * Ugly method for the hopefully most compatible way to * create a URI-String from a local file identifier. * Is similar to java.io.File.toURL in JDK1.2+ */ public static URL toURL(String fname) throws MalformedURLException { File fi = new File(fname); String path = fi.getAbsolutePath(); if (File.separatorChar != '/') path = path.replace(File.separatorChar, '/'); if (!path.startsWith("/")) path = "/" + path; if (!path.endsWith("/") && fi.isDirectory()) path = path + "/"; return new URL("file", "", path); } /** XXX * method to obtain the absolut path of a file-url. * this method is necessary due to a missing * constructor File(URL url) in java.io.File (JDK1.3.1) */ public static String getFileString(URL url) { String proto = url.getProtocol(); if (!"file".equals(proto)) return url.toExternalForm(); else return url.getPath(); } /** * Alternative call with String Argument */ public static String getFileString(String url) { try { return getFileString(new URL(url)); } catch (MalformedURLException e) { // I've seen file:/ and file:// and file:/// // in the real world! // XXX On the other hand in the *X-world any leading // "/" has a meaning, of root, so one should NOT eliminate // everything :-( if (url.startsWith(FILEPROTO)) { int len = FILEPROTO.length(); while (url.charAt(len) == '/' && url.charAt(len + 1) == '/') len++; return url.substring(len); } return url; } } /** * Decode String from <CODE>application/x-www-form-urlencoded</CODE> * to regular Java encoding. */ public static String decode(String s) { try { return decode(s, defaultEncoding); } catch ( UnsupportedEncodingException e ) { return s; } } /** * Decodes a <code>application/x-www-form-urlencoded</code> to regular * Java encoding with specified URL encoding. * * @param s the <code>String</code> to decode * @param enc The name of a supported encoding</a>. * @return the newly decoded <code>String</code> * @exception UnsupportedEncodingException * If the named encoding is not supported */ public static String decode(String s, String enc) throws UnsupportedEncodingException { byte[] bytes = null; boolean changed = false; StringBuffer sb = new StringBuffer(); int numChars = s.length(); int i = 0; while (i < numChars) { char ch = s.charAt(i++); switch (ch) { case '+': sb.append(' '); changed = true; break; case '%': // (numChars-i)/3 is an upper bound for remaining bytes if ( bytes == null ) bytes = new byte[(numChars-i)/3]; int pos = 0; try { while ( ch == '%' && i + 2 <= numChars ) { bytes[pos++] = (byte)Integer.parseInt( s.substring(i,i+2),16); i += 3; if (i > numChars) break; ch = s.charAt(i-1); } sb.append(new String(bytes, 0, pos, enc)); // Incomplete escapes at end and are quietly ignored. if ( ch == '%' && i+2 >= numChars ) sb.append(s.substring(i)); } catch (NumberFormatException e) { // Numberformat problems are quitely ignored! sb.append(new String(bytes, 0, pos, enc)); e.printStackTrace(); System.err.println("*** <"+s+">"); sb.append('%'); } changed = true; break; default: sb.append(ch); break; } } return (changed ? sb.toString() : s); } } // // Jacson - Text Filtering with Java. // Copyright (C) 2002 Frank S. Nestel (nestefan -at- users.sourceforge.net) // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA //
URLTools.java |