Package org.apache.uima.ducc.jd.client

Source Code of org.apache.uima.ducc.jd.client.CasSource

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ducc.jd.client;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.uima.UIMAFramework;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.ducc.common.uima.UimaUtils;
import org.apache.uima.ducc.common.utils.DuccLogger;
import org.apache.uima.ducc.common.utils.DuccLoggerComponents;
import org.apache.uima.ducc.common.utils.QuotedOptions;
import org.apache.uima.ducc.jd.IJobDriver;
import org.apache.uima.ducc.transport.event.common.IDuccWorkJob;
import org.apache.uima.resource.ResourceConfigurationException;
import org.apache.uima.resource.ResourceCreationSpecifier;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.resource.metadata.ConfigurationParameter;
import org.apache.uima.resource.metadata.ConfigurationParameterDeclarations;
import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.TypePriorities;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.XMLParser;


public class CasSource {

  private static DuccLogger duccOut = DuccLoggerComponents.getJdOut(CasSource.class.getName());
 
  private ConcurrentLinkedQueue<CAS> recycledCasList = new ConcurrentLinkedQueue<CAS>();
  private AtomicBoolean exhaustedReader = new AtomicBoolean(false);
  private AtomicInteger seqNo = new AtomicInteger(0);
 
  private CasLimbo casLimbo;
 
  private CasDispatchMap casDispatchMap;
 
  private CollectionReader cr;
  private TypePriorities tp;
  private TypeSystemDescription tsd;
  private FsIndexDescription[] fsid;
  private Properties properties;
 
  private int total = -1;
 
  public CasSource(IJobDriver jd, String crxml, String crcfg, CasDispatchMap casDispatchMap)
    throws IOException, InvalidXMLException, ResourceInitializationException, ResourceConfigurationException {
    init(jd, crxml, crcfg, casDispatchMap);
  }
 
    private void init(IJobDriver jd, String crxml, String crcfg, CasDispatchMap casDispatchMap) throws IOException, ResourceInitializationException, InvalidXMLException, ResourceConfigurationException {
        String location = "init";
        this.casDispatchMap = casDispatchMap;
        casLimbo = new CasLimbo(jd);
        IDuccWorkJob job = jd.getJob();
        // CR descriptor
        String crDescriptor = crxml;
        //
        // Read and parse the collection reader
        //
        XMLParser xmlParser = UIMAFramework.getXMLParser();
        XMLInputSource in = null;
    try {
      in = UimaUtils.getXMLInputSource(crDescriptor);
    } catch (InvalidXMLException e) {
            duccOut.error(location, job.getDuccId(), e);
            throw e;
    }
        ResourceSpecifier crrs = xmlParser.parseCollectionReaderDescription(in);

        duccOut.debug(location, job.getDuccId(), crcfg);
        // CR overrides
        ResourceCreationSpecifier specifier = (ResourceCreationSpecifier) crrs;
        ConfigurationParameterDeclarations configurationParameterDeclarations = specifier.getMetaData().getConfigurationParameterDeclarations();
        ConfigurationParameterSettings cps = specifier.getMetaData().getConfigurationParameterSettings();
        if(crcfg!= null) {
            // Tokenize override assignments on whitespace, honoring but stripping quotes
            // Then create a map from all of them
            ArrayList<String> toks = QuotedOptions.tokenizeList(crcfg, true);
            Map<String,String> map = QuotedOptions.parseAssignments(toks, false);
            for (Entry<String, String> ent : map.entrySet()) {
                String name = ent.getKey();
                String value = ent.getValue();
                duccOut.debug(location, job.getDuccId(), "config param name:"+name+" "+"value:"+value);
                ConfigurationParameter configurationParameter = UimaUtils.findConfigurationParameter(configurationParameterDeclarations, name);
                if (configurationParameter == null) {
                    throw new ResourceConfigurationException(
                                    ResourceConfigurationException.NONEXISTENT_PARAMETER, new Object[] { name, "CollectionReader" });
                }
                Object object = UimaUtils.getOverrideValueObject(configurationParameter, value);
                cps.setParameterValue(name, object);
            }
        }
        // CR
        duccOut.debug(location, job.getDuccId(), "CR creation...");
        cr = UIMAFramework.produceCollectionReader(crrs);
        Properties props = new Properties();
        //  Change the heap size to reduce CAS size. The size here depends on what we intend to
        //  put into the CAS. If its just a pointer to data (like question id), the size of the
        //  heap can be reduced even more
        props.setProperty(UIMAFramework.CAS_INITIAL_HEAP_SIZE, "1000");
        //  save CR type system. It will be used to initialize CASes later, in getEmptyCas()
        init(cr.getProcessingResourceMetaData().getTypeSystem(),
             cr.getProcessingResourceMetaData().getTypePriorities(),
             cr.getProcessingResourceMetaData().getFsIndexes(),
             props);
        duccOut.debug(location, job.getDuccId(), "CR created.");
    }
 
    public void init(TypeSystemDescription tsd,TypePriorities tp,FsIndexDescription[] fsid,Properties properties) {
      this.tsd = tsd;
      this.tp = tp;
      this.fsid = fsid;
      this.properties = properties;
      initTotal();
    }
 
  private CAS getEmptyCas(int seqNo) throws ResourceInitializationException {
    String location = "getEmptyCas";
    CAS cas = getRecycledCas();
    String type = "reuse";
    while(cas == null) {
      //  Use class level locking to serialize access to CasCreationUtils
      //  Only one thread at the time can create a CAS. UIMA uses lazy
      //  initialization approach which can cause NPE when two threads
      //  attempt to initialize a CAS.
      synchronized( CasCreationUtils.class) {
        cas = CasCreationUtils.createCas(tsd, tp, fsid, properties);
      }
      type = "new";
      if(!casDispatchMap.reserveKey(cas)) {
        type = "duplicate";
        duccOut.debug(location, null, "type:"+type+" "+"seqNo:"+seqNo+" "+"casId:"+cas.hashCode());
        cas = null;
      }
    }
    duccOut.debug(location, null, "type:"+type+" "+"seqNo:"+seqNo+" "+"casId:"+cas.hashCode());
    return cas;
  }
 
  private CAS getRecycledCas() {
    CAS cas = null;
    if(!recycledCasList.isEmpty()) {
      cas = recycledCasList.poll();
    }
    return cas;
  }
 
 
  private void putRecycledCas(CAS cas) {
    assert(cas != null);
    recycledCasList.add(cas);
  }

  boolean recycleDisabled = false;
 
  public void recycle(CAS cas) {
    assert(cas != null);
    if(recycleDisabled) {
      return;
    }
    cas.reset();
    putRecycledCas(cas);
  }
 
  public void push(CasTuple casTuple) {
    String location = "push";
    assert(casTuple != null);
    casLimbo.put(casTuple);
    duccOut.debug(location, null, "seqNo:"+casTuple.getSeqno()+" "+"casId:"+casTuple.getCas().hashCode()+" "+"size:"+casLimbo.size());
  }
 
  public boolean isExhaustedReader() {
    String location = "isExhaustedReader";
    boolean retVal = exhaustedReader.get();
    duccOut.debug(location, null, retVal);
    return retVal;
  }
 
  public int getLimboSize() {
    return casLimbo.size();
  }
 
  public boolean isLimboEmpty() {
    return casLimbo.isEmpty();
  }
 
  public boolean hasLimboAvailable() {
    return casLimbo.hasAvailable();
  }
 
  public boolean isEmpty() {
    boolean retVal = false;
    if(isExhaustedReader()) {
      if(isLimboEmpty()) {
        retVal = true;
      }
    }
    return retVal;
  }
 
  public CasTuple pop() throws Exception {
    String location = "pop";
    CasTuple casTuple = casLimbo.get();
    if(casTuple == null) {
      try {
        synchronized(cr) {
          if((total > 0) && (total == seqNo.get())) {
            exhaustedReader.set(true);
          }
          else if(cr.hasNext()) {
            int next = seqNo.addAndGet(1);
            CAS cas = recycledCasList.poll();
            if(cas == null) {
              cas = getEmptyCas(next);
            }
              cr.getNext(cas);
              casTuple = new CasTuple(cas,next);
          }
          else {
            exhaustedReader.set(true);
          }
        }
      }
      catch (CollectionException e) {
        duccOut.error(location, null, e);
        throw e;
      }
      catch (IOException e) {
        duccOut.error(location, null, e);
        throw e;
      }
      catch (ResourceInitializationException e) {
        duccOut.error(location, null, e);
        throw e;
      }
    }
    if(casTuple != null) {
      duccOut.debug(location, null, "seqNo:"+casTuple.getSeqno()+" "+"casId:"+casTuple.getCas().hashCode()+" "+"size:"+casLimbo.size());
    }
    return casTuple;
  }
 
  public Progress[] getProgressArray() {
    synchronized(cr) {
      return cr.getProgress();
    }
  }
 
  public Progress getProgress() {
    Progress progress = null;
    Progress[] progressArray = getProgressArray();
    if(progressArray != null) {
      progress = progressArray[0];
    }
    return progress;
  }
 
  private void initTotal() {
    Progress progress = getProgress();
    if(progress != null) {
      total = (int)progress.getTotal();
    }
  }
 
  public int getTotal() {
    return total;
  }
 
  public int getSeqNo() {
    return seqNo.get();
  }
 
  public void rectifyStatus() {
    if(casLimbo != null) {
      casLimbo.rectifyStatus();
    }
  }
}
TOP

Related Classes of org.apache.uima.ducc.jd.client.CasSource

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.