Package org.tmatesoft.hg.internal

Source Code of org.tmatesoft.hg.internal.DataEntry

* Copyright (c) 2011-2013 TMate Software Ltd
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* GNU General Public License for more details.
* For information on how to redistribute this software under
* the terms of a license other than GNU General Public License
* contact TMate Software at
package org.tmatesoft.hg.internal;

import static org.tmatesoft.hg.core.Nodeid.NULL;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import org.tmatesoft.hg.core.HgRemoteConnectionException;
import org.tmatesoft.hg.core.Nodeid;
import org.tmatesoft.hg.repo.HgChangelog;
import org.tmatesoft.hg.repo.HgInvalidStateException;
import org.tmatesoft.hg.repo.HgParentChildMap;
import org.tmatesoft.hg.repo.HgRemoteRepository;
import org.tmatesoft.hg.repo.HgRemoteRepository.Range;
import org.tmatesoft.hg.repo.HgRemoteRepository.RemoteBranch;
import org.tmatesoft.hg.util.CancelSupport;
import org.tmatesoft.hg.util.CancelledException;
import org.tmatesoft.hg.util.ProgressSupport;

* @author Artem Tikhomirov
* @author TMate Software Ltd.
public class RepositoryComparator {

  private final boolean debug = Boolean.parseBoolean(System.getProperty("hg4j.remote.debug"));
  private final HgParentChildMap<HgChangelog> localRepo;
  private final HgRemoteRepository remoteRepo;
  private List<Nodeid> common;
  private List<Nodeid> remoteHeads;

  public RepositoryComparator(HgParentChildMap<HgChangelog> pwLocal, HgRemoteRepository hgRemote) {
    localRepo = pwLocal;
    remoteRepo = hgRemote;
  public RepositoryComparator compare(ProgressSupport progressSupport, CancelSupport cancelSupport) throws HgRemoteConnectionException, CancelledException {
    common = Collections.unmodifiableList(findCommonWithRemote());
    // sanity check
    for (Nodeid n : common) {
      if (!localRepo.knownNode(n)) {
        throw new HgInvalidStateException("Unknown node reported as common:" + n);
    return this;
  public List<Nodeid> getCommon() {
    if (common == null) {
      throw new HgInvalidStateException("Call #compare(Object) first");
    return common;
  public List<Nodeid> getRemoteHeads() {
    assert remoteHeads != null;
    return remoteHeads;
   * @return revisions that are children of common entries, i.e. revisions that are present on the local server and not on remote.
  public List<Nodeid> getLocalOnlyRevisions() {
    final List<Nodeid> c = getCommon();
    if (c.isEmpty()) {
      return localRepo.all();
    } else {
      final RevisionSet rsCommon = new RevisionSet(c);
      final RevisionSet localHeads = new RevisionSet(localRepo.heads());
      final List<Nodeid> commonChildren = localRepo.childrenOf(c);
      final RevisionSet rsCommonChildren = new RevisionSet(commonChildren);
      // check if there's any revision in the repository that doesn't trace to common
      // e.g. branches from one of common ancestors
      RevisionSet headsNotFromCommon = localHeads.subtract(rsCommonChildren).subtract(rsCommon);
      if (headsNotFromCommon.isEmpty()) {
        return commonChildren;
      RevisionSet all = new RevisionSet(localRepo.all());
      // need outgoing := ancestors(missing) - ancestors(common):
      RevisionSet rsAncestors = all.ancestors(headsNotFromCommon, localRepo);
      // #ancestors gives only parents, we need terminating children as well
      rsAncestors = rsAncestors.union(headsNotFromCommon);
      final RevisionSet rsAncestorsCommon = all.ancestors(rsCommon, localRepo);
      RevisionSet outgoing = rsAncestors.subtract(rsAncestorsCommon).subtract(rsCommon);
      // outgoing keeps children that spined off prior to common revisions
      return outgoing.union(rsCommonChildren).asList();
  private List<Nodeid> findCommonWithRemote() throws HgRemoteConnectionException {
    remoteHeads = remoteRepo.heads();
    LinkedList<Nodeid> resultCommon = new LinkedList<Nodeid>(); // these remotes are known in local
    LinkedList<Nodeid> toQuery = new LinkedList<Nodeid>(); // these need further queries to find common
    for (Nodeid rh : remoteHeads) {
      if (localRepo.knownNode(rh)) {
      } else {
    if (toQuery.isEmpty()) {
      return resultCommon;
    LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>(); // branch.root and branch.head are of interest only.
    // these are branches with unknown head but known root, which might not be the last common known,
    // i.e. there might be children changeset that are also available at remote, [..?..common-head..remote-head] - need to
    // scroll up to common head.
    while (!toQuery.isEmpty()) {
      List<RemoteBranch> remoteBranches = remoteRepo.branches(toQuery)//head, root, first parent, second parent
      while(!remoteBranches.isEmpty()) {
        RemoteBranch rb = remoteBranches.remove(0);
        // I assume branches remote call gives branches with head equal to what I pass there, i.e.
        // that I don't need to check whether rb.head is unknown.
        if (localRepo.knownNode(rb.root)) {
          // we known branch start, common head is somewhere in its descendants line 
        } else {
          // dig deeper in the history, if necessary
          if (!rb.p1.isNull() && !localRepo.knownNode(rb.p1)) {
          if (!rb.p2.isNull() && !localRepo.knownNode(rb.p2)) {
    // can't check nodes between checkUp2Head element and local heads, remote might have distinct descendants sequence
    for (RemoteBranch rb : checkUp2Head) {
      // rb.root is known locally
      List<Nodeid> remoteRevisions = remoteRepo.between(rb.head, rb.root);
      if (remoteRevisions.isEmpty()) {
        // head is immediate child
      } else {
        // between gives result from head to root, I'd like to go in reverse direction
        Nodeid root = rb.root;
        while(!remoteRevisions.isEmpty()) {
          Nodeid n = remoteRevisions.remove(0);
          if (localRepo.knownNode(n)) {
            if (remoteRevisions.isEmpty()) {
              // this is the last known node before an unknown
            if (remoteRevisions.size() == 1) {
              // there's only one left between known n and unknown head
              // this check is to save extra between query, not really essential
              Nodeid last = remoteRevisions.remove(0);
              resultCommon.add(localRepo.knownNode(last) ? last : n);
            // might get handy for next between query, to narrow search down
            root = n;
          } else {
            remoteRevisions = remoteRepo.between(n, root);
            if (remoteRevisions.isEmpty()) {
    // TODO ensure unique elements in the list
    return resultCommon;

  // somewhat similar to Outgoing.findCommonWithRemote()
  public List<BranchChain> calculateMissingBranches() throws HgRemoteConnectionException {
    List<Nodeid> remoteHeads = remoteRepo.heads();
    LinkedList<Nodeid> common = new LinkedList<Nodeid>(); // these remotes are known in local
    LinkedHashSet<Nodeid> toQuery = new LinkedHashSet<Nodeid>(); // these need further queries to find common
    for (Nodeid rh : remoteHeads) {
      if (localRepo.knownNode(rh)) {
      } else {
    if (toQuery.isEmpty()) {
      return Collections.emptyList(); // no incoming changes
    LinkedList<BranchChain> branches2load = new LinkedList<BranchChain>(); // return value
    // detailed comments are in Outgoing.findCommonWithRemote
    LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>();
    // records relation between branch head and its parent branch, if any
    HashMap<Nodeid, BranchChain> head2chain = new HashMap<Nodeid, BranchChain>();
    while (!toQuery.isEmpty()) {
      List<RemoteBranch> remoteBranches = remoteRepo.branches(new ArrayList<Nodeid>(toQuery))//head, root, first parent, second parent
      while(!remoteBranches.isEmpty()) {
        RemoteBranch rb = remoteBranches.remove(0);
        BranchChain chainElement = head2chain.get(rb.head);
        if (chainElement == null) {
          chainElement = new BranchChain(rb.head);
          // record this unknown branch to download later
          // the only chance we'll need chainElement in the head2chain is when we know this branch's root
          head2chain.put(rb.head, chainElement);
        if (localRepo.knownNode(rb.root)) {
          // we known branch start, common head is somewhere in its descendants line 
        } else {
          chainElement.branchRoot = rb.root;
          // dig deeper in the history, if necessary
          boolean hasP1 = !rb.p1.isNull(), hasP2 = !rb.p2.isNull()
          if (hasP1 && !localRepo.knownNode(rb.p1)) {
            // we might have seen parent node already, and recorded it as a branch chain
            // we shall reuse existing BC to get it completely initialized (head2chain map
            // on second put with the same key would leave first BC uninitialized.
            // It seems there's no reason to be afraid (XXX although shall double-check)
            // that BC's chain would get corrupt (its p1 and p2 fields assigned twice with different values)
            // as parents are always the same (and likely, BC that is common would be the last unknown)
            BranchChain bc = head2chain.get(rb.p1);
            if (bc == null) {
              head2chain.put(rb.p1, bc = new BranchChain(rb.p1));
            chainElement.p1 = bc;
          if (hasP2 && !localRepo.knownNode(rb.p2)) {
            BranchChain bc = head2chain.get(rb.p2);
            if (bc == null) {
              head2chain.put(rb.p2, bc = new BranchChain(rb.p2));
            chainElement.p2 = bc;
          if (!hasP1 && !hasP2) {
            // special case, when we do incoming against blank repository, chainElement.branchRoot
            // is first unknown element (revision 0). We need to add another fake BranchChain
            // to fill the promise that terminal BranchChain has branchRoot that is known both locally and remotely
            BranchChain fake = new BranchChain(NULL);
            fake.branchRoot = NULL;
            chainElement.p1 = chainElement.p2 = fake;
    for (RemoteBranch rb : checkUp2Head) {
      Nodeid h = rb.head;
      Nodeid r = rb.root;
      int watchdog = 1000;
      assert head2chain.containsKey(h);
      BranchChain bc = head2chain.get(h);
      assert bc != null : h.toString();
      // if we know branch root locally, there could be no parent branch chain elements.
      assert bc.p1 == null;
      assert bc.p2 == null;
      do {
        List<Nodeid> between = remoteRepo.between(h, r);
        if (between.isEmpty()) {
          bc.branchRoot = r;
        } else {
          for (Nodeid n : between) {
            if (localRepo.knownNode(n)) {
              r = n;
            } else {
              h = n;
          Nodeid lastInBetween = between.get(between.size() - 1);
          if (r.equals(lastInBetween)) {
            bc.branchRoot = r;
          } else if (h.equals(lastInBetween)) { // the only chance for current head pointer to point to the sequence tail
            // is when r is second from the between list end (iow, head,1,[2],4,8...,root)
            bc.branchRoot = r;
      } while(--watchdog > 0);
      if (watchdog == 0) {
        throw new HgInvalidStateException(String.format("Can't narrow down branch [%s, %s]", rb.head.shortNotation(), rb.root.shortNotation()));
    if (debug) {
      for (BranchChain bc : branches2load) {
    return branches2load;

  // root and head (and all between) are unknown for each chain element but last (terminal), which has known root (revision
  // known to be locally and at remote server
  // alternative would be to keep only unknown elements (so that promise of calculateMissingBranches would be 100% true), but that
  // seems to complicate the method, while being useful only for the case when we ask incoming for an empty repository (i.e.
  // where branch chain return all nodes, -1..tip.
  public static final class BranchChain {
    // when we construct a chain, we know head which is missing locally, hence init it right away.
    // as for root (branch unknown start), we might happen to have one locally, and need further digging to find out right branch start 
    public final Nodeid branchHead;
    public Nodeid branchRoot;
    // either of these can be null, or both.
    // although RemoteBranch has either both parents null, or both non-null, when we construct a chain
    // we might encounter that we locally know one of branch's parent, hence in the chain corresponding field will be blank.
    public BranchChain p1;
    public BranchChain p2;

    public BranchChain(Nodeid head) {
      assert head != null;
      branchHead = head;
    public boolean isTerminal() {
      return p1 == null && p2 == null; // either can be null, see comment above. Terminal is only when no way to descent
    // true when this BranchChain is a branch that spans up to very start of the repository
    // Thus, the only common revision is NULL, recorded in a fake BranchChain object shared between p1 and p2
    /*package-local*/ boolean isRepoStart() {
      return p1 == p2 && p1 != null && p1.branchHead == p1.branchRoot && p1.branchHead.isNull();

    public String toString() {
      return String.format("BranchChain [root:%s, head:%s]", branchRoot, branchHead);

    void dump() {
      internalDump("  ");

    private void internalDump(String prefix) {
      if (p1 != null) {
        System.out.println(prefix + p1.toString());
      } else if (p2 != null) {
        System.out.println(prefix + "NONE?!");
      if (p2 != null) {
        System.out.println(prefix + p2.toString());
      } else if (p1 != null) {
        System.out.println(prefix + "NONE?!");
      prefix += "  ";
      if (p1 != null) {
      if (p2 != null) {

   * @return list of nodeids from branchRoot to branchHead, inclusive. IOW, first element of the list is always root of the branch
  public List<Nodeid> completeBranch(final Nodeid branchRoot, final Nodeid branchHead) throws HgRemoteConnectionException {
    class DataEntry {
      public final Nodeid queryHead;
      public final int headIndex;
      public List<Nodeid> entries;

      public DataEntry(Nodeid head, int index, List<Nodeid> data) {
        queryHead = head;
        headIndex = index;
        entries = data;

    List<Nodeid> initial = remoteRepo.between(branchHead, branchRoot);
    Nodeid[] result = new Nodeid[1 + (1 << initial.size())];
    result[0] = branchHead;
    int rootIndex = -1; // index in the result, where to place branche's root.
    if (initial.isEmpty()) {
      rootIndex = 1;
    } else if (initial.size() == 1) {
      rootIndex = 2;
    LinkedList<DataEntry> datas = new LinkedList<DataEntry>();
    // DataEntry in datas has entries list filled with 'between' data, whereas
    // DataEntry in toQuery keeps only nodeid and its index, with entries to be initialized before
    // moving to datas.
    LinkedList<DataEntry> toQuery = new LinkedList<DataEntry>();
    datas.add(new DataEntry(branchHead, 0, initial));
    int totalQueries = 1;
    HashSet<Nodeid> queried = new HashSet<Nodeid>();
    while(!datas.isEmpty()) {
      // keep record of those planned to be queried next time we call between()
      // although may keep these in queried, if really don't want separate collection
      HashSet<Nodeid> scheduled = new HashSet<Nodeid>()
      do {
        DataEntry de = datas.removeFirst();
        // populate result with discovered elements between de.qiueryRoot and branch's head
        for (int i = 1, j = 0; j < de.entries.size(); i = i << 1, j++) {
          int idx = de.headIndex + i;
          result[idx] = de.entries.get(j);
        // form next query entries from new unknown elements
        if (de.entries.size() > 1) {
          /* when entries has only one element, it means de.queryRoot was at head-2 position, and thus
           * no new information can be obtained. E.g. when it's 2, it might be case of [0..4] query with
           * [1,2] result, and we need one more query to get element 3.  
          for (int i =1, j = 0; j < de.entries.size(); i = i<<1, j++) {
            int idx = de.headIndex + i;
            Nodeid x = de.entries.get(j);
            if (!queried.contains(x) && !scheduled.contains(x) && (rootIndex == -1 || rootIndex - de.headIndex > 1)) {
              /*queries for elements right before head is senseless, but unless we know head's index, do it anyway*/
              toQuery.add(new DataEntry(x, idx, null));
      } while (!datas.isEmpty());
      if (!toQuery.isEmpty()) {
      // for each query, create an between request range, keep record Range->DataEntry to know range's start index 
      LinkedList<HgRemoteRepository.Range> betweenBatch = new LinkedList<HgRemoteRepository.Range>();
      HashMap<HgRemoteRepository.Range, DataEntry> rangeToEntry = new HashMap<HgRemoteRepository.Range, DataEntry>();
      for (DataEntry de : toQuery) {
        HgRemoteRepository.Range r = new HgRemoteRepository.Range(branchRoot, de.queryHead);
        rangeToEntry.put(r, de);
      if (!betweenBatch.isEmpty()) {
        Map<Range, List<Nodeid>> between = remoteRepo.between(betweenBatch);
        for (Entry<Range, List<Nodeid>> e : between.entrySet()) {
          DataEntry de = rangeToEntry.get(e.getKey());
          assert de != null;
          de.entries = e.getValue();
          if (rootIndex == -1 && de.entries.size() == 1) {
            // returned sequence of length 1 means we used element from [head-2] as root
            int numberOfElementsExcludingRootAndHead = de.headIndex + 1;
            rootIndex = numberOfElementsExcludingRootAndHead + 1;
            if (debug) {
              System.out.printf("On query %d found out exact number of missing elements: %d\n", totalQueries, numberOfElementsExcludingRootAndHead);
          datas.add(de); // queue up to record result and construct further requests
    if (rootIndex == -1) {
      throw new HgInvalidStateException("Shall not happen, provided between output is correct");
    result[rootIndex] = branchRoot;
    boolean resultOk = true;
    LinkedList<Nodeid> fromRootToHead = new LinkedList<Nodeid>();
    IntVector missing = new IntVector();
    for (int i = 0; i <= rootIndex; i++) {
      Nodeid n = result[i];
      if (n == null) {
        resultOk = false;
      fromRootToHead.addFirst(n); // reverse order
    if (debug) {
      System.out.println("Total queries:" + totalQueries);
    if (!resultOk) {
      assert missing.size() > 0;
      // TODO post-1.0 perhaps, there's better alternative than HgInvalidStateException, e.g. HgDataFormatException?
      throw new HgInvalidStateException(String.format("Missing elements with indexes: %s", Arrays.toString(missing.toArray())));
    return fromRootToHead;

   *  returns in order from branch root to head
   *  for a non-empty BranchChain, shall return modifiable list
  public List<Nodeid> visitBranches(BranchChain bc) throws HgRemoteConnectionException {
    if (bc == null) {
      return Collections.emptyList();
    List<Nodeid> mine = completeBranch(bc.branchRoot, bc.branchHead);
    if (bc.isTerminal() || bc.isRepoStart()) {
      return mine;
    List<Nodeid> parentBranch1 = visitBranches(bc.p1);
    List<Nodeid> parentBranch2 = visitBranches(bc.p2);
    // merge
    LinkedList<Nodeid> merged = new LinkedList<Nodeid>();
    ListIterator<Nodeid> i1 = parentBranch1.listIterator(), i2 = parentBranch2.listIterator();
    while (i1.hasNext() && i2.hasNext()) {
      Nodeid n1 =;
      Nodeid n2 =;
      if (n1.equals(n2)) {
      } else {
        // first different => add both, and continue adding both tails sequentially
    // copy rest of second parent branch
    while (i2.hasNext()) {
    // copy rest of first parent branch
    while (i1.hasNext()) {
    ArrayList<Nodeid> rv = new ArrayList<Nodeid>(mine.size() + merged.size());
    return rv;

  public void collectKnownRoots(BranchChain bc, Set<Nodeid> result) {
    if (bc == null) {
    if (bc.isTerminal()) {
    if (bc.isRepoStart()) {
    collectKnownRoots(bc.p1, result);
    collectKnownRoots(bc.p2, result);

Related Classes of org.tmatesoft.hg.internal.DataEntry

Copyright © 2018 All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact