Package org.sonatype.nexus.index

Source Code of org.sonatype.nexus.index.NexusScanningListener

/*
* Sonatype Nexus (TM) Open Source Version
* Copyright (c) 2007-2014 Sonatype, Inc.
* All rights reserved. Includes the third-party code listed at http://links.sonatype.com/products/nexus/oss/attributions.
*
* This program and the accompanying materials are made available under the terms of the Eclipse Public License Version 1.0,
* which accompanies this distribution and is available at http://www.eclipse.org/legal/epl-v10.html.
*
* Sonatype Nexus (TM) Professional Version is available from Sonatype, Inc. "Sonatype" and "Sonatype Nexus" are trademarks
* of Sonatype, Inc. Apache Maven is a trademark of the Apache Software Foundation. M2eclipse is a trademark of the
* Eclipse Foundation. All other trademarks are the property of their respective owners.
*/
package org.sonatype.nexus.index;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.sonatype.scheduling.TaskUtil;
import org.sonatype.sisu.goodies.common.ComponentSupport;

import org.apache.commons.lang.time.DurationFormatUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.maven.index.ArtifactContext;
import org.apache.maven.index.ArtifactInfo;
import org.apache.maven.index.ArtifactScanningListener;
import org.apache.maven.index.ScanningResult;
import org.apache.maven.index.context.IndexingContext;
import org.apache.maven.index.creator.MinimalArtifactInfoIndexCreator;

/**
* Nexus specific ArtifactScanningListener implementation. Looks like the MI's DefaultScannerListener, but has
* subtle but important differences. Most importantly, the "update" parameter is aligned with the meaning of
* "fullReindex", that was before somewhat negation of it, but not fully. Lessen memory consumption by removal
* of fields like uinfos and group related ones. The "deletion" detection is done inversely as in default
* scanner listener: instead gather all the "present" uinfo's into a (potentially huge) set of  strings,
* index is read and processedUinfos is used to check what is present. Redundant optimize call removed also.
*
* @since 2.3
*/
public class NexusScanningListener
    extends ComponentSupport
    implements ArtifactScanningListener
{
  private final IndexingContext context;

  private final IndexSearcher contextIndexSearcher;

  private final boolean fullReindex;

  private final boolean isProxy;

  // the UINFO set used to track processed artifacts (grows during scanning)
  private final Set<String> processedUinfos = new HashSet<String>();

  // exceptions detected and gathered during scanning
  private final List<Exception> exceptions = new ArrayList<Exception>();

  // total count of artifacts discovered
  private int discovered;

  // total count of artifacts added to index
  private int added;

  // total count of artifacts updated on index
  private int updated;

  // timestamp in millis when scanning started
  private long scanningStarted;

  public NexusScanningListener(final IndexingContext context,
                               final IndexSearcher contextIndexSearcher,
                               final boolean fullReindex,
                               final boolean isProxy)
      throws IOException
  {
    this.context = context;
    this.contextIndexSearcher = contextIndexSearcher;
    this.fullReindex = fullReindex;
    this.isProxy = isProxy;
    this.discovered = 0;
    this.added = 0;
    this.updated = 0;
  }

  @Override
  public void scanningStarted(final IndexingContext ctx) {
    log.info("Scanning of repositoryID=\"{}\" started.", context.getRepositoryId());
    scanningStarted = System.currentTimeMillis();
  }

  @Override
  public void artifactDiscovered(final ArtifactContext ac) {
    TaskUtil.checkInterruption();
    final String uinfo = ac.getArtifactInfo().getUinfo();
    if (!processedUinfos.add(uinfo)) {
      return; // skip individual snapshots, this skips like unique timestamped snapshots as indexer uses baseVersion
    }

    try {
      // hosted-full: just blindly add, no need for uniq check, as it happens against empty ctx
      // hosted-nonFull: do update, add when document changed (see update method)
      // proxy-full: do update, as record might be present from downloaded index. Usually is, but Central does not publish ClassNames so update will happen
      // proxy-non-full: do update, as record might be present from downloaded index or already indexed (by some prev scan or by being pulled from remote).

      // act accordingly what we do: hosted/proxy repair/update
      final IndexOp indexOp;
      if (fullReindex && !isProxy) {
        // HOSTED-full only -- in this case, work is done against empty temp ctx so it fine
        // is cheaper, does add, but
        // does not maintain uniqueness
        indexOp = index(ac);
      }
      else {
        // HOSTED-nonFull + PROXY-full/nonFull must go this path. In case of proxy, remote index was pulled, so ctx is not empty
        // is costly, does delete+add
        // maintains uniqueness
        indexOp = update(ac);
      }
      discovered++;
      if (IndexOp.ADDED == indexOp) {
        added++;
      }
      else if (IndexOp.UPDATED == indexOp) {
        updated++;
      }
      for (Exception e : ac.getErrors()) {
        artifactError(ac, e);
      }
    }
    catch (Exception ex) {
      artifactError(ac, ex);
    }
  }

  @Override
  public void scanningFinished(final IndexingContext ctx, final ScanningResult result) {
    TaskUtil.checkInterruption();
    int removed = 0;
    try {
      if (!fullReindex && !isProxy) {
        // HOSTED-nonFull only, perform delete detection too (remove stuff from index that is removed from repository
        removed = removeDeletedArtifacts(result.getRequest().getStartingPath());
      }
      // rebuild groups, as methods moved out from IndexerEngine does not maintain groups anymore
      // as it makes no sense to do it during batch invocation of update method
      context.rebuildGroups();
      context.commit();
    }
    catch (IOException ex) {
      result.addException(ex);
    }

    result.setTotalFiles(discovered);
    result.setDeletedFiles(removed);
    result.getExceptions().addAll(exceptions);

    if (result.getDeletedFiles() > 0 || result.getTotalFiles() > 0) {
      try {
        context.updateTimestamp(true);
        context.optimize();
      }
      catch (Exception ex) {
        result.addException(ex);
      }
    }
    log.info(
        "Scanning of repositoryID=\"{}\" finished: scanned={}, added={}, updated={}, removed={}, scanningDuration={}",
        context.getRepositoryId(), discovered, added, updated, removed,
        DurationFormatUtils.formatDurationHMS(System.currentTimeMillis() - scanningStarted)
    );
  }

  @Override
  public void artifactError(final ArtifactContext ac, final Exception e) {
    Exception exception = e;
    if (ac.getPom() != null || ac.getArtifact() != null) {
      final StringBuilder sb = new StringBuilder("Found a problem while indexing");
      if (ac.getArtifact() != null) {
        sb.append(" artifact '" + ac.getArtifact().getAbsolutePath() + "'");
      }
      if (ac.getPom() != null) {
        sb.append(" pom '" + ac.getPom().getAbsolutePath() + "'");
      }
      exception = new Exception(sb.toString(), e);
    }
    exceptions.add(exception);
  }

  /**
   * Used in {@code update} mode, deletes documents from index that are not found during scanning (means
   * they were deleted from the storage being scanned).
   */
  private int removeDeletedArtifacts(final String contextPath)
      throws IOException
  {
    int deleted = 0;
    final IndexReader r = contextIndexSearcher.getIndexReader();
    for (int i = 0; i < r.maxDoc(); i++) {
      if (!r.isDeleted(i)) {
        final Document d = r.document(i);
        final String uinfo = d.get(ArtifactInfo.UINFO);
        if (uinfo != null && !processedUinfos.contains(uinfo)) {
          // file is not present in storage but is on index, delete it from index
          final String[] ra = ArtifactInfo.FS_PATTERN.split(uinfo);
          final ArtifactInfo ai = new ArtifactInfo();
          ai.repository = context.getRepositoryId();
          ai.groupId = ra[0];
          ai.artifactId = ra[1];
          ai.version = ra[2];
          if (ra.length > 3) {
            ai.classifier = ArtifactInfo.renvl(ra[3]);
          }
          if (ra.length > 4) {
            ai.packaging = ArtifactInfo.renvl(ra[4]);
          }

          // minimal ArtifactContext for removal
          final ArtifactContext ac = new ArtifactContext(null, null, null, ai, ai.calculateGav());
          if (contextPath == null
              || context.getGavCalculator().gavToPath(ac.getGav()).startsWith(contextPath)) {
            if (IndexOp.DELETED == remove(ac)) {
              deleted++;
            }
          }
        }
      }
    }
    return deleted;
  }

  // == copied from
  // https://github.com/apache/maven-indexer/blob/maven-indexer-5.1.0/indexer-core/src/main/java/org/apache/maven/index/DefaultIndexerEngine.java
  // Changes made:
  // * none of the index/update/remove method does more that modifying index, timestamp is not set by either
  // * update does not maintains groups either (per invocation!), it happens once at scan finish

  public enum IndexOp
  {
    NOOP, ADDED, UPDATED, DELETED;
  }

  private IndexOp index(final ArtifactContext ac)
      throws IOException
  {
    if (ac != null && ac.getGav() != null) {
      final Document d = ac.createDocument(context);
      if (d != null) {
        context.getIndexWriter().addDocument(d);
        return IndexOp.ADDED;
      }
    }
    return IndexOp.NOOP;
  }

  private IndexOp update(final ArtifactContext ac)
      throws IOException
  {
    if (ac != null && ac.getGav() != null) {
      final Document d = ac.createDocument(context);
      if (d != null) {
        final Document old = getOldDocument(ac);
        if (old == null) {
          context.getIndexWriter().addDocument(d);
          return IndexOp.ADDED;
        }
        else if (!equals(d, old)) {
          context.getIndexWriter().updateDocument(
              new Term(ArtifactInfo.UINFO, ac.getArtifactInfo().getUinfo()), d);
          return IndexOp.UPDATED;
        }
      }
    }
    return IndexOp.NOOP;
  }

  private IndexOp remove(final ArtifactContext ac)
      throws IOException
  {
    if (ac != null) {
      final String uinfo = ac.getArtifactInfo().getUinfo();
      // add artifact deletion marker
      final Document doc = new Document();
      doc.add(new Field(ArtifactInfo.DELETED, uinfo, Field.Store.YES, Field.Index.NO));
      doc.add(new Field(ArtifactInfo.LAST_MODIFIED, //
          Long.toString(System.currentTimeMillis()), Field.Store.YES, Field.Index.NO));
      IndexWriter w = context.getIndexWriter();
      w.addDocument(doc);
      w.deleteDocuments(new Term(ArtifactInfo.UINFO, uinfo));
      return IndexOp.DELETED;
    }
    return IndexOp.NOOP;
  }

  private boolean equals(final Document d1, final Document d2) {
    // d1 is never null, check caller
    if (d1 == null && d2 == null) {
      return true;
    }
    // d2 is never null, check caller
    if (d1 == null || d2 == null) {
      return false;
    }
    final Map<String, String> m1 = toMap(d1);
    final Map<String, String> m2 = toMap(d2);
    m1.remove(MinimalArtifactInfoIndexCreator.FLD_LAST_MODIFIED.getKey());
    m2.remove(MinimalArtifactInfoIndexCreator.FLD_LAST_MODIFIED.getKey());

    final boolean result = m1.equals(m2);
    if (!result) {
      log.trace("d1={}, d2={}", m1, m2);
    }
    return result;
  }

  private Map<String, String> toMap(final Document d) {
    final HashMap<String, String> result = new HashMap<String, String>();
    for (Object o : d.getFields()) {
      final Fieldable f = (Fieldable) o;
      if (f.isStored()) {
        result.put(f.name(), f.stringValue());
      }
    }
    return result;
  }

  private Document getOldDocument(ArtifactContext ac)
      throws IOException
  {
    final TopDocs result =
        contextIndexSearcher.search(
            new TermQuery(new Term(ArtifactInfo.UINFO, ac.getArtifactInfo().getUinfo())), 2);

    if (result.totalHits == 1) {
      return contextIndexSearcher.doc(result.scoreDocs[0].doc);
    }
    return null;
  }
}
TOP

Related Classes of org.sonatype.nexus.index.NexusScanningListener

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.