/**********************************************************************
 * Class URNUtils
 *  
 * Copyright (c) 2006-2012, German National Library/Deutsche Nationalbibliothek
 * Adickesallee 1, D-60322 Frankfurt am Main, Federal Republic of Germany 
 *
 * This program is free software.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * Kadir Karaca Kocer -- German National Library
 * 
 **********************************************************************/

/* ********************************************************************
 * CHANGELOG:
 * 2012-03-14 Commented and ported to Apache Maven
 * 20111007 - Package refactoring
 * 20110930 - URL correction/normalizing routines
 * 20080315 - getChecksum added. getURN modified.
 * Created on 26.06.2006 16:11:21 by Kadir Karaca Koçer, German National Library
 **********************************************************************/

package org.nbnResolving.common;

import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/** ********************************************************************
 * Class to generate URNs and/or URN-Checksums
 * See the URN documentation and www.persistent-identifier.de
 * Some URL normalizing utility functions are also implemented.
 *
 * @author  Kadir Karaca Koçer, German National Library
 * @version 20111007
 * @see java.net.URI
 * @see java.net.URL
 * {@link "http://tools.ietf.org/html/rfc2141"}
 * {@link "http://tools.ietf.org/html/rfc3188"}
 * {@link "http://tools.ietf.org/html/rfc3986"}
 * @since 20060626
 **********************************************************************/
 
public class URNUtils
{
    private static final Log LOGGER = LogFactory.getLog(URNUtils.class);
    
	//CONSTANTS  **********************************************************
	private static final char[]   chars  = {
		                           '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
			                       'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
			                       'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
			                       'u', 'v', 'w', 'x', 'y', 'z', '-', ':', '_', '/',
			                       '.', '+'};
	
	private static final String[] values = {
		                             "1",  "2",  "3",  "4",  "5",  "6",  "7",  "8",  "9", "41",
			                        "18", "14", "19", "15", "16", "21", "22", "23", "24", "25",
			                        "42", "26", "27", "13", "28", "29", "31", "12", "32", "33",
			                        "11", "34", "35", "36", "37", "38", "39", "17", "43", "45",
			                        "47", "49"};
	
	private static final String ENCODING = "UTF-8";
	private static final java.text.SimpleDateFormat dateFormatter = new java.text.SimpleDateFormat("yyyyMMddHHmmssSSS");
	
	/**
	 * Calculates the checksum of the given string.
	 * See homepage for the algorithm.
	 * 
	 * @param tmpurn URN without checksum
	 * @return Checksum of URN
	 */
	public static int getChecksum(String tmpurn){
		//first convert it to lower case
		String urn = tmpurn.trim().toLowerCase();
		//build the checksum string
		String cs = "";
		for (int i=0; i < urn.length(); i++){
			boolean error = true;
			char c = urn.charAt(i);
			for (int j=0; j < chars.length; j++){
				if (c == chars[j]){      //we found the searched character
					cs = cs + values[j]; //add its value
					error = false;       //good! no error.
				}
			}
			if (error) { //invalid character. See URN documentation
			    LOGGER.error("Invalid Character! Returning -1");
				return -1;
			}
		}
		//calculate the checksum
		int sum = 0;
		//System.out.println("CS = " + cs);
		for (int i=0; i < cs.length(); i++){
			String ss = cs.substring(i, i + 1);
			if (LOGGER.isDebugEnabled()) LOGGER.debug(i + ". Substring = " + ss);
			if (ss != null && !ss.equals("")){
				Integer x = new Integer(ss);
				sum = sum + (x.byteValue() * (i+1));
			} else {
			    LOGGER.warn("getChecksum: Substring empty!");
			}
		}
		if (LOGGER.isDebugEnabled()) LOGGER.debug("SUM = " + sum);
		Integer last = new Integer(cs.substring(cs.length() - 1, cs.length()));
		sum = (sum / last.byteValue()) % 10;
		
		return sum;
	}
	
	/**
	 * Returns a valid URN with checksum for the given identifier without checksum.
	 * 
	 * @param tmpurn URN without checksum
	 * @return URN with checksum added
	 */
	public static String getURN(String tmpurn) {
		int checksum = getChecksum(tmpurn);
		//if illegal character return null
		if (checksum < 0) return null;
		//else URN_WITHOUT_CHECKSUM + CHECKSUM
		return tmpurn + checksum;
	}
	
	/**
	 * Returns a valid URN for the given Namespace and another id.
	 * 
	 * @param ns Namespace
	 * @param id Another unique number
	 * @return URN with checksum added
	 */
	public static String getURNforId(String ns, String id){
		String urn = null;
		//namespace can not be null and must be longer than urn:nbn:**:*-
		if ((ns != null) && (ns.length() > 13)) {
			if (ns.endsWith("-")) {
				urn = ns + id;
			} else {
				urn = ns  + "-" + id;
			}
			urn = getURN(urn + "-");
		}	
		return urn;
	}
	
	/**
	 * Returns a random generated, valid URN for the given Namespace.
	 * 
	 * @param ns Namespace for this URN.
	 * @return URN with checksum added
	 */
	public static String getRandomURN(String ns) {
		//Long base =  new Long("1168520000000");
		//long ms = System.currentTimeMillis() - base.longValue();
		long ms = (System.currentTimeMillis() % Integer.MAX_VALUE) + 1000000000;
		java.util.Random rnd = new java.util.Random();
		int rand = (int) (rnd.nextFloat() * 9000) + 1000;
		int tid  = (int) (Thread.currentThread().getId() % 90) + 10;
		String id = Long.toString(ms);
		//format the string for better human readability
		id = id.substring(0, 5) + "." + id.substring(5) + "." + rand + tid;
		//send Namespace and Id and return checksum calculated URN 
		return getURNforId(ns, id);
	}
    
    /**
     * Returns a valid URN for the given Namespace beginning with actual date.
     * 
     * @param ns Namespace for this URN.
     * @return URN with checksum added
     */
    public static String getDateURN(String ns) {
        String id = dateFormatter.format(new java.util.Date());
        java.util.Random rnd = new java.util.Random();
        int rand = (int) (rnd.nextFloat() * 9000) + 1000;
        int tid  = (int) (Thread.currentThread().getId() % 900) + 100;
        //format the string for better human readability
        id = id + "-" + tid + rand;
        //send Namespace and Id and return checksum calculated URN 
        return getURNforId(ns, id);
    }

	/* ********* URL Utils ****************************** */
	
	/**
	 * Iterates all elements of a map and returns key=value pairs separated with
	 * given delimiter as string. To get this string URL-Encoded please use
	 * mapToEncodedString() instead.
	 * 
	 * @param map A map containing Key, Value pairs
	 * @param delimiter String between Key=Value pairs. (normally "&" or ";" for URLs).
	 * @return String representation of those Key, Value pairs. Empty string if map is null or empty.
	 */
	public static String mapToString (java.util.AbstractMap<String, String> map, String delimiter) {
		if (map == null) return null;
		String ret = "";
		for (String key : map.keySet()) {
			if ((key != null) && (!key.isEmpty())) {
				if (ret.isEmpty()) ret = ret + key; else ret = ret + delimiter + key;
				String value = map.get(key);
				if ((value != null) && (!value.isEmpty())) ret = ret + "=" + value;
			}
		}
		return ret;
	}
	
	/**
	 * Iterates all elements of a map and returns key=value pairs separated with
	 * given delimiter as URLEncoded string.
	 *  
	 * @param map A map containing Key, Value pairs
	 * @param delimiter String between Key=Value pairs. "&" or ";" for URIs.
	 * @return String representation of those Key, Value pairs. Empty string if map is null or empty.
	 * @throws UnsupportedEncodingException
	 * @see java.net.URLEncoder
	 */
	public static String mapToEncodedString (java.util.AbstractMap<String, String> map, String delimiter) throws UnsupportedEncodingException {
		if (map == null) return null;
		String ret = "";
		for (String key : map.keySet()) {
			if ((key != null) && (!key.isEmpty())) {
				key = normalize(key, false);
				String value = map.get(key);
				if (ret.isEmpty()) ret = ret + key; else ret = ret + delimiter + key;
				if ((value != null) && (!value.isEmpty())) ret = ret + "=" + normalize(value, false);
			}
		}	
		return ret;
	}

	/**
	 * Parses a URL query and returns key, value pairs as a TreeMap.
	 * 
	 * @param query (URI) Query to parse
	 * @param regex Regular expression to use for splitting.
	 * @return Key, Value pairs as Map
	 * @throws UnsupportedEncodingException 
	 */
	public static java.util.TreeMap<String,String> getParameterMap (String query, String regex) throws UnsupportedEncodingException {
		java.util.TreeMap<String,String> map = null;

		if ((query != null) && (!query.isEmpty())) {
			//split all key=value pairs
			map = new java.util.TreeMap<String,String>();
			String[] qa = query.split(regex);
			for (int i = 0; i < qa.length; i++) {
				if ((qa[i]!= null) && (!qa[i].isEmpty())) {
					String[] kva = qa[i].split("=");
					String key   = null;
					String value = null;
					if ((kva.length > 0) && (kva[0] != null) && (!kva[0].isEmpty())) {
						key = normalize(kva[0], false);
						if ((kva.length > 1) && (kva[1] != null) && (!kva[1].isEmpty())) {
							value = normalize(kva[1], false);
						}
						map.put(key, value);
					}
				}
			}
		}
		return map;
	}
	
	/**
	 * Parses an URL query and returns key|value pairs as a TreeMap.
	 * 
	 * @param url URL to parse
	 * @return Key, Value pairs as Map
	 * @throws UnsupportedEncodingException 
	 */
	public static java.util.TreeMap<String,String> getParameterMap (java.net.URL url) throws UnsupportedEncodingException {
		if (url != null) {
			return getParameterMap(url.getQuery(), "[&;]"); 
		}
		return null;
	}

	/**
	 * Adds two given maps and returns a map that contains key|value pairs of both.
	 * Please keep in mind:
	 * <li> all the key | value pairs are normalized with the help of java.net.URLEncoder routines
	 * <li> if the second map contains a key that is already in the first map this will be <b>ignored<b>
	 * the resulting map will have one key|value pair and the value will be the value of the first map.
	 * <li> if both maps contain key|value pairs, the resulting map is a sorted one.
	 * 
	 * @param map1 First map.
	 * @param map2 Second map.
	 * @return A Java map containing all elements of two given maps.
	 * @see java.util.TreeMap
	 * @see java.net.URLDecoder
	 * @see java.net.URLEncoder
	 * @throws UnsupportedEncodingException
	 */
	public static java.util.AbstractMap<String,String> addMaps(java.util.AbstractMap<String, String> map1,
			      java.util.AbstractMap<String, String> map2) throws UnsupportedEncodingException {
		if ((map1 == null) && (map2 == null)) return null; //nothing to add
		if (map1 == null) return map2;
		if (map1.isEmpty()) return map2;
		if (map2 == null) return map1;
		if (map2.isEmpty()) return map1;
		
		//both maps are not NULL and not empty -> create a new empty map and combine both 
		java.util.TreeMap<String,String> map = new java.util.TreeMap<String,String>();

		//iterate first map, normalize all key and value strings and put them in the new map
		for (String key : map1.keySet()) {
			if ((key != null) && (!key.isEmpty())) {
				String value = map1.get(key);
				//test if value is not NULL or empty
				if ((value != null) && (!value.isEmpty())) {
					value = normalize(value, false);
				}
				key = normalize(key, false);
				map.put(key, value);
			}
		}
		
		//iterate second map, normalize all key and value strings and check if key already exists
		for (String key : map2.keySet()) {
			if ((key != null) && (!key.isEmpty())) {
				//normalize the key & check if it is already there
				String nk = normalize(key, false);
				//java.net.URLDecoder.decode(key.trim().replaceAll("%25", "%"), ENCODING);
				//nk = java.net.URLEncoder.encode(key, ENCODING);
				if (!map.containsKey(nk)) {
					String value = map2.get(key);
					//test if value is not NULL or empty
					if ((value != null) && (!value.isEmpty())) {
						value = normalize(value, false);
						/*
						//double conversion with pre-conversion check !!! *crazy!*
						value = java.net.URLDecoder.decode(value.trim().replaceAll("%25", "%"), ENCODING);
						value = java.net.URLEncoder.encode(value, ENCODING);*/
					}
					//put the normalized key, NOT the original one!
					map.put(nk, value);
				}
			}
		}	

		return map;
	}

	/**
	 * Normalises given URL Path.
	 * 
	 * @param path URL Path to parse.
	 * @see java.net.URLEncoder
	 * {@link "http://www.w3.org/TR/html401"}
	 * @return Path as normalised and encoded string.
	 * @throws UnsupportedEncodingException 
	 */
	public static String getNormalisedPath(String path) throws UnsupportedEncodingException {
		String np = "";  //normalized path
		if ((path != null) && (path.length() > 1)) {
			String[] pa = path.split("/");
			for (int i = 0; i < pa.length; i++) {
				if ((pa[i]!= null) && (!pa[i].isEmpty())) {
					/*
					//double conversion with pre-conversion check !!! *crazy!*
					String segment = java.net.URLDecoder.decode(pa[i].replaceAll("%25", "%"), ENCODING);
					segment = java.net.URLEncoder.encode(segment, ENCODING);
					//correct the ~ character and return the normalized string
					*/
					np = np + "/" + normalize(pa[i], false);
				}
			}
		}
		
		return np;
	}

	/**
	 * Corrects wrong converted or encoded strings.
	 * This version only works for false encoded UTF-8 strings to correct Latin1 strings 
	 * 
	 * @param strToCorrect String to check.
	 * @return Corrected String.
	 * @throws UnsupportedEncodingException
	 */
	public static String correctUtf8error(String strToCorrect) throws UnsupportedEncodingException {
		String corrected = strToCorrect;
		String enc = "ISO-8859-1";  //character encoding

		//first part of the Latin-1 Unicode Table
		byte[] ba = {(byte) 0xC2, (byte) 0xA0};  //byte array
		int[] cp = {0x00A0};  //codepoint in Unicode
		while (cp[0] < 0x00BF) {
			ba[1] = (byte) (ba[1] + 1);
			cp[0] = cp[0] + 1;
			String str = new String(ba, enc);
			String ucs = new String(cp, 0, 1);  //unicode String 
			if (LOGGER.isDebugEnabled()) LOGGER.debug("Calculated -> " + ba[0] + " " + ba[1] + "  str: " + str + " ucs: " + ucs);
			corrected = corrected.replaceAll(str, ucs);
		}
		
		//second part of the Latin-1 Unicode Table
		ba[0] = (byte) 0xC3;
		ba[1] = (byte) 127;
		cp[0] = 0x00BF;
		while (cp[0] < 0x00FF) {
			ba[1] = (byte) (ba[1] + 1);
			cp[0] = cp[0] + 1;
			String str = new String(ba, enc);
			String ucs = new String(cp, 0, 1);  //unicode String 
			if (LOGGER.isDebugEnabled()) LOGGER.debug("Calculated -> " + ba[0] + " " + ba[1] + "  str: " + str + " ucs: " + ucs);
			corrected = corrected.replaceAll(str, ucs);
		}

		if (!corrected.equals(strToCorrect)) {
		    LOGGER.info("\nInput String.: " + strToCorrect + "\nOutput String: " + corrected);
		}

		return corrected;
	}
	

	/**
	 * Tries to return a RFC compatible, normalized URL as string from a given string.
	 * 
	 * @param urlStr URL to correct as string.
	 * @param correctEncoding Try to correct common wrong Latin1 - UTF-8 conversions.
	 * @param checkJavaURI Check if Java can parse this string as Java URL and URI without exception
	 * @return Corrected URL as string.
	 * {@link "http://www.w3.org/TR/html401"}
	 */
	public static String correctUrlString(String urlStr, boolean correctEncoding, boolean checkJavaURI) {
		String ret;
		if ((urlStr != null) && (urlStr.length() > 3)) {
			//in the case of percent-encoding an already percent-encoded string.
			ret = urlStr.trim().replaceAll("%25", "%");
		} else return null; //or exception?
			//throw new IllegalArgumentException("ERROR: correctUrlString() - URL can not be NULL or empty!");

		java.net.URL url = null;
		java.util.TreeMap<String,String> params = null;

		try {
			//see RFC: http://greenbytes.de/tech/webdav/rfc3986.html#comparison-ladder
			//Percent-Encoding Normalization
			ret = java.net.URLDecoder.decode(ret, ENCODING);
			//UTF-8 <--> Latin-1 problems
			if (correctEncoding) ret = correctUtf8error(ret);
			url = new java.net.URL(ret);
			String host = url.getHost();
			String path = url.getPath();
			int port = url.getPort();
			String protocol = url.getProtocol();
			String anchor = url.getRef();
			String userInfo = url.getUserInfo();
			
			params = getParameterMap(url);
			ret = buildUrl(protocol, userInfo, host, port, path, params, false, anchor, checkJavaURI);
		} catch (Exception e) {
		    LOGGER.error("ERROR! Exception. Original URL: " + urlStr + "\ncorrected URL: " + ret);
			e.printStackTrace();
			return null;
		}
		
		return ret;
	}


	/**
     * Adds given new query to existing URL.
	 * 
	 * @param urlStr URL to correct as string.
	 * @param aq Additional query: query to add to given URL.
	 * {@link "http://www.w3.org/TR/html401"}
	 * @return Corrected URL as string.
	 * @throws MalformedURLException 
	 * @throws UnsupportedEncodingException
	 * @throws URISyntaxException 
	 */
	public static String addQueryToUrl(String urlStr, String aq) throws MalformedURLException, UnsupportedEncodingException, URISyntaxException {
		if (urlStr == null) return null;
		if (urlStr.length() < 4) return null; //definitely malformed URL
		
		if (aq == null) return urlStr; //nothing to add
		if (aq.length() < 1) return urlStr; //nothing to add
		
		String ret = urlStr.trim().replaceAll("%25", "%");
		ret = java.net.URLDecoder.decode(ret, ENCODING);
		
		String[] kva = aq.split("=");
		if ((kva.length > 0) && (kva[0] != null) && (!kva[0].isEmpty())) {
			String key = normalize(kva[0], false);
			
			java.net.URL url = new java.net.URL(ret);
			java.util.TreeMap<String,String> map = getParameterMap(url);
			if (map != null) {
				if (map.containsKey(key)) {
				    LOGGER.debug("addQueryToUrl(): key already exists --> returning the original URL");
					return urlStr;  //key already exists --> abort	
				}
			} else map = new java.util.TreeMap<String,String>();

			//logger.debug("addQueryToUrl(): Adding the new <Key, Value> pair");
			String value = null;
			if ((kva.length > 1) && (kva[1] != null) && (!kva[1].isEmpty())) {
				value = normalize(kva[1], false);
			}
			map.put(key, value);
			
			ret = buildUrl(url.getProtocol(), url.getUserInfo(), url.getHost(), url.getPort(), getNormalisedPath(url.getPath()), map, false, url.getRef(), true);
		}
		
		return ret;
	}

	/**
	 * Adds given new parameters to existing URL.
	 * 
	 * @param urlStr URL as java string.
	 * @param params New parameters to add.
	 * @return URL as string containing the given key|value pairs in map as query parameters.
	 * @throws UnsupportedEncodingException
	 * @throws MalformedURLException
	 * @throws URISyntaxException
	 */
	public static String addParamsToUrl(String urlStr, java.util.AbstractMap<String,String> params) throws UnsupportedEncodingException, MalformedURLException, URISyntaxException {
		if (urlStr == null) return null;
		if (urlStr.length() < 4) return null; //definitely malformed URL
		if (params == null) return urlStr;   //nothing to add
		if (params.isEmpty()) return urlStr; //nothing to add
		
		String ret = urlStr.trim().replaceAll("%25", "%");
		ret = java.net.URLDecoder.decode(ret, ENCODING);
		java.net.URL url = new java.net.URL(ret);
		//double encoding!!! but you have to be sure :-(
		ret = buildUrl(url.getProtocol(), url.getUserInfo(), url.getHost(), url.getPort(), getNormalisedPath(url.getPath()), addMaps(getParameterMap(url), params), false, url.getRef(), false);
		return ret;
	}
	
		
	/**
	 * Builds an URI from its given components.
	 * 
	 * @param protocol Protocol part of this URI
	 * @param userInfo User info part of this URI
	 * @param host Host part of this URI
	 * @param port Port part of this URI
	 * @param path Path part of this URI
	 * @param params Parameters part of this URI
	 * @param encodeParams TRUE if the parameters must be encoded
	 * @param anchor Anchor part of this URI
	 * @param checkJavaURI TRUE if extra validation wanted
	 * 
	 * @return A valid URL (correct encoded) as string. 
	 * @throws UnsupportedEncodingException
	 * @throws URISyntaxException
	 * @throws MalformedURLException
	 */
	public static String buildUrl (String protocol, String userInfo, String host, int port, String path,
			                      java.util.AbstractMap<String, String> params, boolean encodeParams,
			                      String anchor, boolean checkJavaURI)
	                      throws UnsupportedEncodingException, URISyntaxException, MalformedURLException {
		//Case Normalization
		//Path Segment Normalization --> we do not need it, because we do not have relative URIs
		//Scheme-Based Normalization
		String norm = "";
		if ((protocol != null) && (protocol.length() > 1))
			norm = norm + protocol.trim().toLowerCase() + "://";
		else
			norm = norm + "http://";  //HTTP is default

		if ((userInfo != null) && (userInfo.length() > 2)) norm = norm + userInfo.trim() + "@";

		if ((host != null) && (host.length() > 3))
			norm = norm + host.trim().toLowerCase();
		else
			throw new IllegalArgumentException("No host Name!");
		
		if (port > 0) norm = norm + ":" + port;
		
		norm = norm + getNormalisedPath(path);

		if ((params != null) && (!params.isEmpty())) {
			//WORKAROUND: PDF-Viewer needs a "&" at the beginning. Hope it is "legal" (Karaca)
			if (encodeParams) norm = norm + "?&" + mapToEncodedString(params, "&");
			else norm = norm + "?&" + mapToString(params, "&");
		}
		
		if ((anchor != null) && (!anchor.isEmpty())) norm = norm + "#" + normalize(anchor, false);
		if (LOGGER.isDebugEnabled()) LOGGER.debug("Corrected URL:\n" + norm);

		if (checkJavaURI) {
			//just test if it passes the Java intern consistency check --for paranoids :-)
			java.net.URL url = new java.net.URL(norm);
			url.toURI();
			norm = url.toExternalForm();	
		}
	
		return norm;
	}

	/**
	 * Normalizes the "Percent Encoding" of an URI
	 * 
	 * @param str The URI to normalize as Java String
	 * @param lowerCase TRUE if the string should be converted to all lower case.
	 * @return URL as normalized string.
	 * @throws UnsupportedEncodingException
	 */
	public static String normalize(String str, boolean lowerCase) throws UnsupportedEncodingException {
		if (str == null) return null;
		String ret = str.trim();
		if (lowerCase) ret = ret.toLowerCase();
		ret = ret.replaceAll("%25", "%");
		ret = java.net.URLDecoder.decode(ret, ENCODING);
		ret = java.net.URLEncoder.encode(ret, ENCODING);
		ret = ret.replaceAll("%7E", "~");
		ret = ret.replaceAll("%2C", ",");
		return ret;
	}
}
