/*
* Copyright (c) 2011-2013 TMate Software Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* For information on how to redistribute this software under
* the terms of a license other than GNU General Public License
* contact TMate Software at support@hg4j.com
*/
package org.tmatesoft.hg.internal;
import static org.tmatesoft.hg.core.Nodeid.NULL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.tmatesoft.hg.core.HgRemoteConnectionException;
import org.tmatesoft.hg.core.Nodeid;
import org.tmatesoft.hg.repo.HgChangelog;
import org.tmatesoft.hg.repo.HgInvalidStateException;
import org.tmatesoft.hg.repo.HgParentChildMap;
import org.tmatesoft.hg.repo.HgRemoteRepository;
import org.tmatesoft.hg.repo.HgRemoteRepository.Range;
import org.tmatesoft.hg.repo.HgRemoteRepository.RemoteBranch;
import org.tmatesoft.hg.util.CancelSupport;
import org.tmatesoft.hg.util.CancelledException;
import org.tmatesoft.hg.util.ProgressSupport;
/**
*
* @author Artem Tikhomirov
* @author TMate Software Ltd.
*/
public class RepositoryComparator {
private final boolean debug = Boolean.parseBoolean(System.getProperty("hg4j.remote.debug"));
private final HgParentChildMap<HgChangelog> localRepo;
private final HgRemoteRepository remoteRepo;
private List<Nodeid> common;
private List<Nodeid> remoteHeads;
public RepositoryComparator(HgParentChildMap<HgChangelog> pwLocal, HgRemoteRepository hgRemote) {
localRepo = pwLocal;
remoteRepo = hgRemote;
}
public RepositoryComparator compare(ProgressSupport progressSupport, CancelSupport cancelSupport) throws HgRemoteConnectionException, CancelledException {
cancelSupport.checkCancelled();
progressSupport.start(10);
common = Collections.unmodifiableList(findCommonWithRemote());
// sanity check
for (Nodeid n : common) {
if (!localRepo.knownNode(n)) {
throw new HgInvalidStateException("Unknown node reported as common:" + n);
}
}
progressSupport.done();
return this;
}
public List<Nodeid> getCommon() {
if (common == null) {
throw new HgInvalidStateException("Call #compare(Object) first");
}
return common;
}
public List<Nodeid> getRemoteHeads() {
assert remoteHeads != null;
return remoteHeads;
}
/**
* @return revisions that are children of common entries, i.e. revisions that are present on the local server and not on remote.
*/
public List<Nodeid> getLocalOnlyRevisions() {
final List<Nodeid> c = getCommon();
if (c.isEmpty()) {
return localRepo.all();
} else {
final RevisionSet rsCommon = new RevisionSet(c);
final RevisionSet localHeads = new RevisionSet(localRepo.heads());
final List<Nodeid> commonChildren = localRepo.childrenOf(c);
final RevisionSet rsCommonChildren = new RevisionSet(commonChildren);
// check if there's any revision in the repository that doesn't trace to common
// e.g. branches from one of common ancestors
RevisionSet headsNotFromCommon = localHeads.subtract(rsCommonChildren).subtract(rsCommon);
if (headsNotFromCommon.isEmpty()) {
return commonChildren;
}
RevisionSet all = new RevisionSet(localRepo.all());
// need outgoing := ancestors(missing) - ancestors(common):
RevisionSet rsAncestors = all.ancestors(headsNotFromCommon, localRepo);
// #ancestors gives only parents, we need terminating children as well
rsAncestors = rsAncestors.union(headsNotFromCommon);
final RevisionSet rsAncestorsCommon = all.ancestors(rsCommon, localRepo);
RevisionSet outgoing = rsAncestors.subtract(rsAncestorsCommon).subtract(rsCommon);
// outgoing keeps children that spined off prior to common revisions
return outgoing.union(rsCommonChildren).asList();
}
}
private List<Nodeid> findCommonWithRemote() throws HgRemoteConnectionException {
remoteHeads = remoteRepo.heads();
LinkedList<Nodeid> resultCommon = new LinkedList<Nodeid>(); // these remotes are known in local
LinkedList<Nodeid> toQuery = new LinkedList<Nodeid>(); // these need further queries to find common
for (Nodeid rh : remoteHeads) {
if (localRepo.knownNode(rh)) {
resultCommon.add(rh);
} else {
toQuery.add(rh);
}
}
if (toQuery.isEmpty()) {
return resultCommon;
}
LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>(); // branch.root and branch.head are of interest only.
// these are branches with unknown head but known root, which might not be the last common known,
// i.e. there might be children changeset that are also available at remote, [..?..common-head..remote-head] - need to
// scroll up to common head.
while (!toQuery.isEmpty()) {
List<RemoteBranch> remoteBranches = remoteRepo.branches(toQuery); //head, root, first parent, second parent
toQuery.clear();
while(!remoteBranches.isEmpty()) {
RemoteBranch rb = remoteBranches.remove(0);
// I assume branches remote call gives branches with head equal to what I pass there, i.e.
// that I don't need to check whether rb.head is unknown.
if (localRepo.knownNode(rb.root)) {
// we known branch start, common head is somewhere in its descendants line
checkUp2Head.add(rb);
} else {
// dig deeper in the history, if necessary
if (!rb.p1.isNull() && !localRepo.knownNode(rb.p1)) {
toQuery.add(rb.p1);
}
if (!rb.p2.isNull() && !localRepo.knownNode(rb.p2)) {
toQuery.add(rb.p2);
}
}
}
}
// can't check nodes between checkUp2Head element and local heads, remote might have distinct descendants sequence
for (RemoteBranch rb : checkUp2Head) {
// rb.root is known locally
List<Nodeid> remoteRevisions = remoteRepo.between(rb.head, rb.root);
if (remoteRevisions.isEmpty()) {
// head is immediate child
resultCommon.add(rb.root);
} else {
// between gives result from head to root, I'd like to go in reverse direction
Collections.reverse(remoteRevisions);
Nodeid root = rb.root;
while(!remoteRevisions.isEmpty()) {
Nodeid n = remoteRevisions.remove(0);
if (localRepo.knownNode(n)) {
if (remoteRevisions.isEmpty()) {
// this is the last known node before an unknown
resultCommon.add(n);
break;
}
if (remoteRevisions.size() == 1) {
// there's only one left between known n and unknown head
// this check is to save extra between query, not really essential
Nodeid last = remoteRevisions.remove(0);
resultCommon.add(localRepo.knownNode(last) ? last : n);
break;
}
// might get handy for next between query, to narrow search down
root = n;
} else {
remoteRevisions = remoteRepo.between(n, root);
Collections.reverse(remoteRevisions);
if (remoteRevisions.isEmpty()) {
resultCommon.add(root);
}
}
}
}
}
// TODO ensure unique elements in the list
return resultCommon;
}
// somewhat similar to Outgoing.findCommonWithRemote()
public List<BranchChain> calculateMissingBranches() throws HgRemoteConnectionException {
List<Nodeid> remoteHeads = remoteRepo.heads();
LinkedList<Nodeid> common = new LinkedList<Nodeid>(); // these remotes are known in local
LinkedHashSet<Nodeid> toQuery = new LinkedHashSet<Nodeid>(); // these need further queries to find common
for (Nodeid rh : remoteHeads) {
if (localRepo.knownNode(rh)) {
common.add(rh);
} else {
toQuery.add(rh);
}
}
if (toQuery.isEmpty()) {
return Collections.emptyList(); // no incoming changes
}
LinkedList<BranchChain> branches2load = new LinkedList<BranchChain>(); // return value
// detailed comments are in Outgoing.findCommonWithRemote
LinkedList<RemoteBranch> checkUp2Head = new LinkedList<RemoteBranch>();
// records relation between branch head and its parent branch, if any
HashMap<Nodeid, BranchChain> head2chain = new HashMap<Nodeid, BranchChain>();
while (!toQuery.isEmpty()) {
List<RemoteBranch> remoteBranches = remoteRepo.branches(new ArrayList<Nodeid>(toQuery)); //head, root, first parent, second parent
toQuery.clear();
while(!remoteBranches.isEmpty()) {
RemoteBranch rb = remoteBranches.remove(0);
BranchChain chainElement = head2chain.get(rb.head);
if (chainElement == null) {
chainElement = new BranchChain(rb.head);
// record this unknown branch to download later
branches2load.add(chainElement);
// the only chance we'll need chainElement in the head2chain is when we know this branch's root
head2chain.put(rb.head, chainElement);
}
if (localRepo.knownNode(rb.root)) {
// we known branch start, common head is somewhere in its descendants line
checkUp2Head.add(rb);
} else {
chainElement.branchRoot = rb.root;
// dig deeper in the history, if necessary
boolean hasP1 = !rb.p1.isNull(), hasP2 = !rb.p2.isNull();
if (hasP1 && !localRepo.knownNode(rb.p1)) {
toQuery.add(rb.p1);
// we might have seen parent node already, and recorded it as a branch chain
// we shall reuse existing BC to get it completely initialized (head2chain map
// on second put with the same key would leave first BC uninitialized.
// It seems there's no reason to be afraid (XXX although shall double-check)
// that BC's chain would get corrupt (its p1 and p2 fields assigned twice with different values)
// as parents are always the same (and likely, BC that is common would be the last unknown)
BranchChain bc = head2chain.get(rb.p1);
if (bc == null) {
head2chain.put(rb.p1, bc = new BranchChain(rb.p1));
}
chainElement.p1 = bc;
}
if (hasP2 && !localRepo.knownNode(rb.p2)) {
toQuery.add(rb.p2);
BranchChain bc = head2chain.get(rb.p2);
if (bc == null) {
head2chain.put(rb.p2, bc = new BranchChain(rb.p2));
}
chainElement.p2 = bc;
}
if (!hasP1 && !hasP2) {
// special case, when we do incoming against blank repository, chainElement.branchRoot
// is first unknown element (revision 0). We need to add another fake BranchChain
// to fill the promise that terminal BranchChain has branchRoot that is known both locally and remotely
BranchChain fake = new BranchChain(NULL);
fake.branchRoot = NULL;
chainElement.p1 = chainElement.p2 = fake;
}
}
}
}
for (RemoteBranch rb : checkUp2Head) {
Nodeid h = rb.head;
Nodeid r = rb.root;
int watchdog = 1000;
assert head2chain.containsKey(h);
BranchChain bc = head2chain.get(h);
assert bc != null : h.toString();
// if we know branch root locally, there could be no parent branch chain elements.
assert bc.p1 == null;
assert bc.p2 == null;
do {
List<Nodeid> between = remoteRepo.between(h, r);
if (between.isEmpty()) {
bc.branchRoot = r;
break;
} else {
Collections.reverse(between);
for (Nodeid n : between) {
if (localRepo.knownNode(n)) {
r = n;
} else {
h = n;
break;
}
}
Nodeid lastInBetween = between.get(between.size() - 1);
if (r.equals(lastInBetween)) {
bc.branchRoot = r;
break;
} else if (h.equals(lastInBetween)) { // the only chance for current head pointer to point to the sequence tail
// is when r is second from the between list end (iow, head,1,[2],4,8...,root)
bc.branchRoot = r;
break;
}
}
} while(--watchdog > 0);
if (watchdog == 0) {
throw new HgInvalidStateException(String.format("Can't narrow down branch [%s, %s]", rb.head.shortNotation(), rb.root.shortNotation()));
}
}
if (debug) {
System.out.println("calculateMissingBranches:");
for (BranchChain bc : branches2load) {
bc.dump();
}
}
return branches2load;
}
// root and head (and all between) are unknown for each chain element but last (terminal), which has known root (revision
// known to be locally and at remote server
// alternative would be to keep only unknown elements (so that promise of calculateMissingBranches would be 100% true), but that
// seems to complicate the method, while being useful only for the case when we ask incoming for an empty repository (i.e.
// where branch chain return all nodes, -1..tip.
public static final class BranchChain {
// when we construct a chain, we know head which is missing locally, hence init it right away.
// as for root (branch unknown start), we might happen to have one locally, and need further digging to find out right branch start
public final Nodeid branchHead;
public Nodeid branchRoot;
// either of these can be null, or both.
// although RemoteBranch has either both parents null, or both non-null, when we construct a chain
// we might encounter that we locally know one of branch's parent, hence in the chain corresponding field will be blank.
public BranchChain p1;
public BranchChain p2;
public BranchChain(Nodeid head) {
assert head != null;
branchHead = head;
}
public boolean isTerminal() {
return p1 == null && p2 == null; // either can be null, see comment above. Terminal is only when no way to descent
}
// true when this BranchChain is a branch that spans up to very start of the repository
// Thus, the only common revision is NULL, recorded in a fake BranchChain object shared between p1 and p2
/*package-local*/ boolean isRepoStart() {
return p1 == p2 && p1 != null && p1.branchHead == p1.branchRoot && p1.branchHead.isNull();
}
@Override
public String toString() {
return String.format("BranchChain [root:%s, head:%s]", branchRoot, branchHead);
}
void dump() {
System.out.println(toString());
internalDump(" ");
}
private void internalDump(String prefix) {
if (p1 != null) {
System.out.println(prefix + p1.toString());
} else if (p2 != null) {
System.out.println(prefix + "NONE?!");
}
if (p2 != null) {
System.out.println(prefix + p2.toString());
} else if (p1 != null) {
System.out.println(prefix + "NONE?!");
}
prefix += " ";
if (p1 != null) {
p1.internalDump(prefix);
}
if (p2 != null) {
p2.internalDump(prefix);
}
}
}
/**
* @return list of nodeids from branchRoot to branchHead, inclusive. IOW, first element of the list is always root of the branch
*/
public List<Nodeid> completeBranch(final Nodeid branchRoot, final Nodeid branchHead) throws HgRemoteConnectionException {
class DataEntry {
public final Nodeid queryHead;
public final int headIndex;
public List<Nodeid> entries;
public DataEntry(Nodeid head, int index, List<Nodeid> data) {
queryHead = head;
headIndex = index;
entries = data;
}
};
List<Nodeid> initial = remoteRepo.between(branchHead, branchRoot);
Nodeid[] result = new Nodeid[1 + (1 << initial.size())];
result[0] = branchHead;
int rootIndex = -1; // index in the result, where to place branche's root.
if (initial.isEmpty()) {
rootIndex = 1;
} else if (initial.size() == 1) {
rootIndex = 2;
}
LinkedList<DataEntry> datas = new LinkedList<DataEntry>();
// DataEntry in datas has entries list filled with 'between' data, whereas
// DataEntry in toQuery keeps only nodeid and its index, with entries to be initialized before
// moving to datas.
LinkedList<DataEntry> toQuery = new LinkedList<DataEntry>();
//
datas.add(new DataEntry(branchHead, 0, initial));
int totalQueries = 1;
HashSet<Nodeid> queried = new HashSet<Nodeid>();
while(!datas.isEmpty()) {
// keep record of those planned to be queried next time we call between()
// although may keep these in queried, if really don't want separate collection
HashSet<Nodeid> scheduled = new HashSet<Nodeid>();
do {
DataEntry de = datas.removeFirst();
// populate result with discovered elements between de.qiueryRoot and branch's head
for (int i = 1, j = 0; j < de.entries.size(); i = i << 1, j++) {
int idx = de.headIndex + i;
result[idx] = de.entries.get(j);
}
// form next query entries from new unknown elements
if (de.entries.size() > 1) {
/* when entries has only one element, it means de.queryRoot was at head-2 position, and thus
* no new information can be obtained. E.g. when it's 2, it might be case of [0..4] query with
* [1,2] result, and we need one more query to get element 3.
*/
for (int i =1, j = 0; j < de.entries.size(); i = i<<1, j++) {
int idx = de.headIndex + i;
Nodeid x = de.entries.get(j);
if (!queried.contains(x) && !scheduled.contains(x) && (rootIndex == -1 || rootIndex - de.headIndex > 1)) {
/*queries for elements right before head is senseless, but unless we know head's index, do it anyway*/
toQuery.add(new DataEntry(x, idx, null));
scheduled.add(x);
}
}
}
} while (!datas.isEmpty());
if (!toQuery.isEmpty()) {
totalQueries++;
}
// for each query, create an between request range, keep record Range->DataEntry to know range's start index
LinkedList<HgRemoteRepository.Range> betweenBatch = new LinkedList<HgRemoteRepository.Range>();
HashMap<HgRemoteRepository.Range, DataEntry> rangeToEntry = new HashMap<HgRemoteRepository.Range, DataEntry>();
for (DataEntry de : toQuery) {
queried.add(de.queryHead);
HgRemoteRepository.Range r = new HgRemoteRepository.Range(branchRoot, de.queryHead);
betweenBatch.add(r);
rangeToEntry.put(r, de);
}
if (!betweenBatch.isEmpty()) {
Map<Range, List<Nodeid>> between = remoteRepo.between(betweenBatch);
for (Entry<Range, List<Nodeid>> e : between.entrySet()) {
DataEntry de = rangeToEntry.get(e.getKey());
assert de != null;
de.entries = e.getValue();
if (rootIndex == -1 && de.entries.size() == 1) {
// returned sequence of length 1 means we used element from [head-2] as root
int numberOfElementsExcludingRootAndHead = de.headIndex + 1;
rootIndex = numberOfElementsExcludingRootAndHead + 1;
if (debug) {
System.out.printf("On query %d found out exact number of missing elements: %d\n", totalQueries, numberOfElementsExcludingRootAndHead);
}
}
datas.add(de); // queue up to record result and construct further requests
}
betweenBatch.clear();
rangeToEntry.clear();
}
toQuery.clear();
}
if (rootIndex == -1) {
throw new HgInvalidStateException("Shall not happen, provided between output is correct");
}
result[rootIndex] = branchRoot;
boolean resultOk = true;
LinkedList<Nodeid> fromRootToHead = new LinkedList<Nodeid>();
IntVector missing = new IntVector();
for (int i = 0; i <= rootIndex; i++) {
Nodeid n = result[i];
if (n == null) {
missing.add(i);
resultOk = false;
}
fromRootToHead.addFirst(n); // reverse order
}
if (debug) {
System.out.println("Total queries:" + totalQueries);
}
if (!resultOk) {
assert missing.size() > 0;
// TODO post-1.0 perhaps, there's better alternative than HgInvalidStateException, e.g. HgDataFormatException?
throw new HgInvalidStateException(String.format("Missing elements with indexes: %s", Arrays.toString(missing.toArray())));
}
return fromRootToHead;
}
/**
* returns in order from branch root to head
* for a non-empty BranchChain, shall return modifiable list
*/
public List<Nodeid> visitBranches(BranchChain bc) throws HgRemoteConnectionException {
if (bc == null) {
return Collections.emptyList();
}
List<Nodeid> mine = completeBranch(bc.branchRoot, bc.branchHead);
if (bc.isTerminal() || bc.isRepoStart()) {
return mine;
}
List<Nodeid> parentBranch1 = visitBranches(bc.p1);
List<Nodeid> parentBranch2 = visitBranches(bc.p2);
// merge
LinkedList<Nodeid> merged = new LinkedList<Nodeid>();
ListIterator<Nodeid> i1 = parentBranch1.listIterator(), i2 = parentBranch2.listIterator();
while (i1.hasNext() && i2.hasNext()) {
Nodeid n1 = i1.next();
Nodeid n2 = i2.next();
if (n1.equals(n2)) {
merged.addLast(n1);
} else {
// first different => add both, and continue adding both tails sequentially
merged.add(n2);
merged.add(n1);
break;
}
}
// copy rest of second parent branch
while (i2.hasNext()) {
merged.add(i2.next());
}
// copy rest of first parent branch
while (i1.hasNext()) {
merged.add(i1.next());
}
//
ArrayList<Nodeid> rv = new ArrayList<Nodeid>(mine.size() + merged.size());
rv.addAll(merged);
rv.addAll(mine);
return rv;
}
public void collectKnownRoots(BranchChain bc, Set<Nodeid> result) {
if (bc == null) {
return;
}
if (bc.isTerminal()) {
result.add(bc.branchRoot);
return;
}
if (bc.isRepoStart()) {
return;
}
collectKnownRoots(bc.p1, result);
collectKnownRoots(bc.p2, result);
}
}