package page.example;

import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.wikiwebserver.core.WareHouse;
import org.wikiwebserver.handler.http.FormData;
import org.wikiwebserver.handler.http.HTTPException;
import org.wikiwebserver.handler.http.HTTPHandler;
import org.wikiwebserver.handler.http.interfaces.CacheableHTTPResponse;
import org.wikiwebserver.handler.http.interfaces.HTTPResponder;

public class SiteScraper implements HTTPResponder, CacheableHTTPResponse {
    
    private static final long DEFAULT_EXPIRE = 60 * 1000;    
    private String url, re;

    public void init(HTTPHandler conn) throws IOException {
        FormData formData = conn.getRequest().getFormData();
        if (formData != null) {
            url = formData.getFirst("url");
            re = formData.getFirst("re");
        }
        conn.getResponse().getHeaders().set("Content-Type", "text/html; charset=utf-8");
    } 
    
    public Object respond(HTTPHandler conn) throws IOException {
        
        if (url == null || re == null) {
            throw new HTTPException(500, "url and re parameters required");
        }        
        
        URLConnection remote;
        try {
            remote = new URL(url).openConnection();
        } catch (IOException ex) {
            throw new HTTPException(500, "Failed to fetch URL", ex);
        }
        
        String contentType = remote.getContentType();
        if (contentType == null || !contentType.startsWith("text/")) {
            throw new HTTPException(500, "Unsupported content type (" + contentType + ")");
        }
        

        String enc = remote.getContentEncoding();
        if (enc == null) enc = "utf8";
        
        byte[] data = WareHouse.streamToByteArray(remote.getInputStream());
        
        String page = new String(data, enc);
        StringBuilder result = new StringBuilder();
            
        Pattern p = Pattern.compile(re, Pattern.MULTILINE | Pattern.DOTALL);
        Matcher m = p.matcher(page);
        if (!m.find()) {
            return "No match!";
        } else {
            result.append(m.group());
            while (m.find()) {
                result.append(m.group());
            }
        }
        
        return result.toString();
    }

    public String getCacheKey() {
        if (url == null && re == null) return "";
        return url + re;
    }  

    public long getExpireTime() {
        return System.currentTimeMillis() + DEFAULT_EXPIRE;
    }
}
