package com.alimama.mdrill.ui.service;
import javax.servlet.jsp.JspWriter;
import org.apache.log4j.Logger;
import com.alimama.mdrill.json.JSONArray;
import com.alimama.mdrill.json.JSONException;
import com.alimama.mdrill.json.JSONObject;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import weka.core.Instances;
import weka.classifiers.functions.GaussianProcesses;
import weka.classifiers.evaluation.NumericPrediction;
import weka.classifiers.timeseries.WekaForecaster;
import java.io.*;
public class TimeSeries {
private static Logger LOG = Logger.getLogger(TimeSeries.class);
private static SimpleDateFormat fmt = new SimpleDateFormat("yyyyMMdd");
private static String[] getAllFields(JSONArray list) throws JSONException
{
HashSet<String> hashSet=new HashSet<String>();
for(int i=0;i<list.length();i++)
{
JSONObject item=list.getJSONObject(i);
Iterator keys = item.keys();
while (keys.hasNext()) {
hashSet.add((String) keys.next());
}
}
String[] fields=hashSet.toArray(new String[hashSet.size()]);
return fields;
}
private static HashMap<String,String> fieldToShowName(String fl,String dimvalue)
{
String[] ffffields=fl.split(",");
HashMap<String,String> field2dimvalue=new HashMap<String, String>();
String[] dimvalues=dimvalue.replaceAll(" ", "").replaceAll("\t", "").replaceAll("^,", "").split(",");
for(int i=0;i<ffffields.length;i++)
{
String f=ffffields[i];
if(f.indexOf("(")>=0&&f.indexOf(")")>=0)
{
if(i>0&&(i-1)<dimvalues.length)
{
field2dimvalue.put(f, dimvalues[i-1]);
}else{
field2dimvalue.put(f, f);
}
}
}
return field2dimvalue;
}
private static HashMap<String,HashMap<String,Object>> setUpLines(StringBuilder numberfield,String[] fields,boolean isSingleY, HashMap<String,String> field2dimvalue){
HashMap<String,HashMap<String,Object>> lines=new HashMap<String,HashMap<String,Object>>();
String join="";
for(int i=0;i<fields.length;i++)
{
String f=fields[i];
if(f.indexOf("(")>=0&&f.indexOf(")")>=0)
{
numberfield.append(join);
numberfield.append(f);
join=",";
HashMap<String, Object> label=new HashMap<String, Object>();
label.put("Y", isSingleY?"Y":f);
label.put("label", field2dimvalue.get(f));
label.put("data", new ArrayList<ArrayList<Object>>());
lines.put(f, label);
HashMap<String, Object> labelpre=new HashMap<String, Object>();
labelpre.put("Y", isSingleY?"Y":f);
labelpre.put("label", field2dimvalue.get(f)+"_预测");
labelpre.put("data", new ArrayList<ArrayList<Object>>());
lines.put(f+"_pre", labelpre);
}
}
return lines;
}
public static void writeArrfHeader( OutputStreamWriter ramwriter,String[] fields) throws IOException
{
ramwriter.append("% Sales of Australian wine (thousands of litres)\r\n");
ramwriter.append("% from Jan 1980 - 1980-07-01 1995. Data is sorted in\r\n");
ramwriter.append("% time\r\n");
ramwriter.append("@relation mdrill\r\n");
ramwriter.append("\r\n");
for(int i=0;i<fields.length;i++)
{
String f=fields[i];
if(f.indexOf("(")>=0&&f.indexOf(")")>=0)
{
ramwriter.append("@attribute "+f+" numeric\r\n");
}else if(f.equals("thedate")){
ramwriter.append("@attribute "+f+" date 'yyyyMMdd'\r\n");
}else{
ramwriter.append("@attribute "+f+" string\r\n");
}
}
ramwriter.append("\r\n");
ramwriter.append("@data\r\n");
}
public static class minMaxX{
long min=Long.MAX_VALUE;
long max=Long.MIN_VALUE;
}
private static String maxThedate(String f,String maxThedate,Object val)
{
if(f.equals("thedate"))
{
if(maxThedate==null||maxThedate.compareTo(String.valueOf(val))<=0)
{
maxThedate=String.valueOf(val);
}
}
return maxThedate;
}
public static void makePre(HashMap<String,HashMap<String,Object>> lines,boolean writeline,int end,int presize,minMaxX minmax,String[] fields,JSONArray list,StringBuilder numberfield) throws Exception
{
ByteArrayOutputStream ramout=new ByteArrayOutputStream();
OutputStreamWriter ramwriter=new OutputStreamWriter(ramout);
writeArrfHeader(ramwriter, fields);
String maxThedate=null;
for(int i=0;i<end;i++)
{
JSONObject item=list.getJSONObject(i);
String joinl="";
StringBuilder bl = new StringBuilder();
for(String f:fields)
{
Object val=item.get(f);
maxThedate=maxThedate(f,maxThedate, val);
bl.append(joinl);
bl.append(val);
joinl=",";
if(writeline&&lines.containsKey(f))
{
ArrayList<ArrayList<Object>> d=(ArrayList<ArrayList<Object>>) lines.get(f).get("data");
ArrayList<Object> drow=new ArrayList<Object>();
long t=fmt.parse(item.getString("thedate")).getTime();
minmax.min=Math.min(minmax.min, t);
minmax.max=Math.max(minmax.max, t);
drow.add(0, t);
drow.add(1, val);
d.add(drow);
}
}
ramwriter.append(bl.toString()+"\r\n");
}
ramwriter.close();
byte[] data=ramout.toByteArray();
ByteArrayInputStream ramStrem=new ByteArrayInputStream(data);
Instances wine = new Instances(new InputStreamReader(ramStrem));
WekaForecaster forecaster = new WekaForecaster();
forecaster.setFieldsToForecast(numberfield.toString());
forecaster.setBaseForecaster(new GaussianProcesses());
forecaster.getTSLagMaker().setTimeStampField("thedate"); // date time stamp
forecaster.getTSLagMaker().setMinLag(1);
forecaster.getTSLagMaker().setMaxLag(12); // monthly data
forecaster.getTSLagMaker().setAddMonthOfYear(true);
forecaster.getTSLagMaker().setAddQuarterOfYear(true);
forecaster.buildForecaster(wine);
forecaster.primeForecaster(wine);
// training data
List<List<NumericPrediction>> forecast = forecaster.forecast(presize);
Date startdate=fmt.parse(maxThedate);
String[] numfieldsarr=numberfield.toString().split(",");
for (int i = 0; i < presize; i++) {
List<NumericPrediction> predsAtStep = forecast.get(i);
for (int j = 0; j < numfieldsarr.length; j++) {
NumericPrediction predForTarget = predsAtStep.get(j);
if(lines.containsKey(numfieldsarr[j]+"_pre"))
{
ArrayList<ArrayList<Object>> d=(ArrayList<ArrayList<Object>>) lines.get(numfieldsarr[j]+"_pre").get("data");
ArrayList<Object> drow=new ArrayList<Object>();
long t=new Date(startdate.getTime()+1000l*3600*24*(i)).getTime();
minmax.min=Math.min(minmax.min, t);
minmax.max=Math.max(minmax.max, t);
drow.add(0, t);
drow.add(0, t);
drow.add(1, predForTarget.predicted());
d.add(drow);
}
}
}
}
public static String result(String projectName, String callback,
String startStr, String rowsStr, String queryStr, String dist,
String fl, String groupby, String sort, String order,String leftjoin,JspWriter out,String dimvalue,String singleY)
throws Throwable {
HeartBeat hb=new HeartBeat(out);
new Thread(hb).start();
boolean isSingleY=false;
if(singleY!=null&&singleY.toUpperCase().equals("Y"))
{
isSingleY=true;
}
HashMap<String,Object> rtna=new HashMap<String, Object>();
if(groupby==null||!groupby.equals("thedate"))
{
rtna.put("code",0);
rtna.put("msg","时间序列分析必须选择日期,且不能有其他维度");
hb.stop();
return new JSONObject(rtna).toString();
}
String rtn= MdrillService.result(projectName, null, "0", "100", queryStr, dist, fl, groupby, "thedate", "asc", leftjoin, null);
JSONObject jsonObj = new JSONObject(rtn);
if(!jsonObj.getString("code").equals("1"))
{
rtna.put("code",0);
rtna.put("msg","服务器异常,请稍后再试");
hb.stop();
return new JSONObject(rtna).toString();
}
JSONArray list=jsonObj.getJSONObject("data").getJSONArray("docs");
int presize=(list.length()/3)+1;
if(list.length()<2)
{
rtna.put("code",0);
rtna.put("msg","请至少选择2天的数据");
hb.stop();
return new JSONObject(rtna).toString();
}
String[] fields=getAllFields(list);
HashMap<String,String> field2dimvalue=fieldToShowName(fl, dimvalue);
StringBuilder numberfield = new StringBuilder();
HashMap<String,HashMap<String,Object>> lines=setUpLines(numberfield,fields, isSingleY, field2dimvalue);
int len=list.length();
minMaxX minmax=new minMaxX();
for(int i=10;i<len;i++)
{
makePre(lines, false, i, 1, minmax, fields, list, numberfield);
}
makePre(lines, true, len, presize, minmax, fields, list, numberfield);
hb.stop();
rtna.put("code", 1);
rtna.put("data", lines);
rtna.put("min", minmax.min);
rtna.put("max", minmax.max);
return new JSONObject(rtna).toString();
}
}