# # patch "ChangeLog" # from [50d13e5d3712436ca27eaaa524f18d2e5de19cbd] # to [0c93e56cbfe4d58ed57bd79e231715d2fb639582] # # patch "commands.cc" # from [688c544d00c8a14a7ad7764d0905f85a2f0ddfac] # to [e70ede39de672a6bc765a280a1f761010b5eee2b] # --- ChangeLog +++ ChangeLog @@ -1,3 +1,10 @@ +2005-06-xx Timothy Brownawell + + * commands.cc: Beginnings of precise-cdv merge, as "pcdv" command. + (Translation of Python reference implementation posted to Revctrl.) + Will be moved to a more appropriate home once it works (currenty + has insane memory usage). + 2005-06-05 Timothy Brownawell * netsync.cc, netcmd.cc: Style cleanups (mostly whitespace). --- commands.cc +++ commands.cc @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include #include @@ -3785,4 +3787,634 @@ app.db.clear_var(k); } + + +// find lines that occur exactly once in each of a and b +void +unique_lcs(vector const & a, + vector const & b, + vector > & res) +{ + // index[line in a] = position of line + // if line is a duplicate, index[line] = -1 + map index; + for (int i = 0; (unsigned int)(i) < a.size(); ++i) + { + map::iterator j = index.find(idx(a,i)); + if (j != index.end()) + j->second=-1; + else + index.insert(make_pair(idx(a,i), i)); + } + // btoa[i] = a.find(b[i]), if b[i] is unique in both + // otherwise, btoa[i] = -1 + map index2; + vector btoa(b.size()); + for (int i = 0; (unsigned int)(i) < b.size(); ++i) + { + map::iterator j = index.find(idx(b,i)); + if (j != index.end()) + { + map::iterator k = index2.find(idx(b,i)); + if (k != index2.end()) + { + btoa[k->second] = -1; + index.erase(j); + } + else + { + index2.insert(make_pair(idx(b,i), i)); + btoa[i] = j->second; + } + } + } + // Patience sorting + // http://en.wikipedia.org/wiki/Patience_sorting + vector backpointers(b.size(), -1); + vector stacks; + vector lasts; + int k = 0; + for (int bpos = 0; (unsigned int)(bpos) < btoa.size(); ++bpos) + { + int apos = idx(btoa, bpos); + if (apos == -1) + continue; + // optimize: check if next line comes at the end + if (stacks.size() && stacks.back() < apos) + k = stacks.size(); + // optimize: check if next line comes after prev line + else if (stacks.size() + && stacks[k] < apos + && ((unsigned int)(k) == stacks.size()-1 + || idx(stacks,k+1) > apos)) + ++k; + else + { +// k = bisect(stacks, apos); + for (int x = 0; (unsigned int)(x) < stacks.size(); ++x) + if (idx(stacks,x) > apos) + { + k = x; + break; + } + } + if (k > 0) + idx(backpointers, bpos) = idx(lasts, k-1); + if ((unsigned int)(k) < stacks.size()) + { + idx(stacks, k) = apos; + idx(lasts, k) = bpos; + } + else + { + stacks.push_back(apos); + lasts.push_back(bpos); + } + } + res.clear(); + if (lasts.empty()) + return; + k = lasts.back(); + while (k != -1) + { + res.push_back(make_pair(idx(btoa, k), k)); + k = idx(backpointers, k); + } + reverse(res.begin(), res.end()); + return; +} + +void +recurse_matches(vector const & a, + vector const & b, + int ahi, + int bhi, + vector > & answer, + int maxrecursion) +{ + if (maxrecursion < 0) + return; + unsigned int oldlength = answer.size(); + int alo = 0, blo = 0; + if (oldlength != 0) + { + alo = answer.back().first + 1; + blo = answer.back().second + 1; + } + // extend line matches into section matches + vector > linematches; + vector a2, b2; + for(int i = alo; i < ahi; ++i) + a2.push_back(idx(a, i)); + for(int i = blo; i < bhi; ++i) + b2.push_back(idx(b, i)); + unique_lcs(a2, b2, linematches); + for (vector >::iterator i = linematches.begin(); + i != linematches.end(); ++i) + { + int apos = i->first + alo; + int bpos = i->second + blo; + int lasta = -1, lastb = -1; + if (answer.size()) + { + lasta = answer.back().first; + lastb = answer.back().second; + } + // don't overlap with an existing match + if (apos <= lasta || bpos <= lastb) + continue; + // extend as far back as possible + while (apos > lasta + 1 && bpos > lastb + 1) + { + int newapos = apos - 1; + while (newapos > lasta && idx(a, newapos).empty()) + --newapos; + if (newapos == lasta || idx(a, newapos) != idx(b, bpos-1)) + break; + apos = newapos; + --bpos; + } + recurse_matches(a, b, apos, bpos, answer, maxrecursion-1); + answer.push_back(make_pair(apos, bpos)); + // extend as far forward as possible + while (apos < ahi - 1 && bpos < bhi - 1) + { + int newapos = apos + 1; + while (newapos < ahi - 1 && idx(a, newapos).empty()) + ++newapos; + if (newapos == ahi || idx(a, newapos) != idx(b, bpos + 1)) + break; + apos = newapos; + ++bpos; + answer.push_back(make_pair(apos, bpos)); + } + } + if (answer.size() > oldlength) + // find matches between the laswt match and the end + recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1); +} + +struct living_status +{ + map > overrides; + + living_status() + { + overrides.insert(make_pair("root", vector())); + } + + living_status(map > const & _overrides): + overrides(_overrides) + {} + + living_status + merge(living_status const & other) const + { + map > newdict; + for (map >::const_iterator i = overrides.begin(); + i != overrides.end(); ++i) + { + newdict.insert(*i); + map >::const_iterator j = + other.overrides.find(i->first); + I(j == other.overrides.end() || j->second == i->second); + } + for (map >::const_iterator i + = other.overrides.begin(); i != other.overrides.end(); ++i) + { + map >::const_iterator j + = overrides.find(i->first); + I(j == overrides.end() || j->second == i->second); + } + return living_status(newdict); + } + + bool + is_living() const + { + set oldworking, newworking, ref; + for (map >::const_iterator i = overrides.begin(); + i != overrides.end(); ++i) + ref.insert(i->first); + newworking = ref; + while (oldworking != newworking) + { + oldworking = newworking; + newworking = ref; + for (set::const_iterator k = oldworking.begin(); + k != oldworking.end(); ++k) + { + map >::const_iterator x + = overrides.find(*k); + for (vector::const_iterator j = x->second.begin(); + j != x->second.end(); ++j) + newworking.erase(*j); + } + } + return newworking.find("root") == newworking.end(); + } + + bool + _makes_living(string key) const + { + bool result = false; + while (key != "root") + { + result = !result; + map >::const_iterator i; + i = overrides.find(key); + if (i == overrides.end() || i->second.empty()) + break; + key = idx(i->second, 0); + } + return result; + } + + living_status + set_living(string rev, bool new_status) const + { + if (new_status == is_living()) + return *this; + set alive; + for (map >::const_iterator i = overrides.begin(); + i != overrides.end(); ++i) + alive.insert(i->first); + for (map >::const_iterator i = overrides.begin(); + i != overrides.end(); ++i) + { + for (vector::const_iterator j = i->second.begin(); + j != i->second.end(); ++j) + alive.erase(*j); + } + map > newdict(overrides); + map >::iterator res = + newdict.insert(make_pair(rev, vector())).first; + for (set::iterator i = alive.begin(); + i != alive.end(); ++i) + { + if (_makes_living(*i) != new_status) + res->second.push_back(*i); + } + return living_status(newdict); + } +}; + +struct merge_section +{ + bool split; + vector left; + vector right; + + merge_section(string const & s): + split(false) + { + left.push_back(s); + } + + merge_section(vector const & c): + split(false), left(c) {} + + merge_section(vector const & l, vector const & r): + split(true), left(l), right(r) {} +}; + +//a.mash(b).resolve(c) -> "a and b were merged, with result c" +//a.mash(b).conflict() -> "merge a and b" +//a.resolve(b) -> "b is a child of a" +struct file_state +{ + boost::shared_ptr > > > weave; + map, living_status> states; + + file_state(boost::shared_ptr > > > _weave): + weave(_weave) + {} + + file_state(vector const & initial, string const & rev) + { + weave.reset(new vector > >); + for (int i = 0; (unsigned int)(i) < initial.size(); ++i) + weave->push_back(make_pair(idx(initial, i), make_pair(rev, i))); + for (int i = 0; (unsigned int)(i) < initial.size(); ++i) + { + states[make_pair(rev, i)] = living_status().set_living(rev, true); + } + } + + // combine line states between two versions of a file + file_state + mash(file_state const & other) const + { + I(weave == other.weave); + file_state newstate(weave); + for (map, living_status>::const_iterator i + = states.begin(); i != states.end(); ++i) + { + map, living_status>::const_iterator j + = other.states.find(i->first); + if (j != other.states.end()) + newstate.states[i->first] = i->second.merge(j->second); + else + newstate.states[i->first] = i->second.merge(living_status()); + } + return newstate; + } + + // get the list of live lines in this version of the file + vector + current() + { + vector res; + for (vector > >::iterator i + = weave->begin(); i != weave->end(); ++i) + { + if (states[i->second].is_living()) + res.push_back(i->first); + } + return res; + } + + // merge; return a list of sections which either automerge or conflict + vector + conflict(file_state const & other) + { + I(weave == other.weave); + vector result; + vector left, right, clean; + bool mustleft = false, mustright = false; + for (vector > >::const_iterator i + = weave->begin(); i != weave->end(); ++i) + { + map, living_status>::const_iterator m + = states.find(i->second); + map, living_status>::const_iterator o + = other.states.find(i->second); + bool mm(m != states.end()); + bool oo(o != other.states.end()); + living_status const & meline(mm?m->second:living_status()); + living_status const & otherline(oo?o->second:living_status()); + bool mehave = meline.is_living(); + bool otherhave = otherline.is_living(); + bool mergehave = meline.merge(otherline).is_living(); + if (mehave && otherhave && mergehave) + { + if (mustright && mustleft) + result.push_back(merge_section(left, right)); + else + result.push_back(merge_section(clean)); + result.push_back(merge_section(i->first)); + left.clear(); + right.clear(); + clean.clear(); + mustleft = false; + mustright = false; + } + else + { + if (mehave != otherhave) + { + if (mehave == mergehave) + mustleft = true; + else + mustright = true; + } + if (mehave) + left.push_back(i->first); + if (otherhave) + right.push_back(i->first); + if (mergehave) + clean.push_back(i->first); + } + } + if (mustright && mustleft) + result.push_back(merge_section(left, right)); + else + result.push_back(merge_section(clean)); + return result; + } + + // add a descendent of this version to the weave, and return it + file_state + resolve(vector const & result, string revision) + { + if (weave->empty()) + { + file_state x(result, revision); + *weave = *x.weave; + x.weave = weave; + return x; + } + vector lines; + lines.reserve(weave->size()); + for (vector > >::iterator i + = weave->begin(); i != weave->end(); ++i) + { + if (states[i->second].is_living()) + lines.push_back(i->first); + else + lines.push_back(string()); + } + vector > matches; + recurse_matches(lines, result, + lines.size(), result.size(), + matches, 10); + lines.clear(); + for (vector > >::iterator i + = weave->begin(); i != weave->end(); ++i) + lines.push_back(i->first); + vector > matches2; + matches.push_back(make_pair(lines.size(), result.size())); + for (vector >::iterator i = matches.begin(); + i != matches.end(); ++i) + { + recurse_matches(lines, result, i->first, i->second, matches2, 10); + if ((unsigned int)(i->first) != lines.size()) + matches2.push_back(make_pair(i->first, i->second)); + } + matches.pop_back(); + set > living; + for (vector >::iterator i = matches2.begin(); + i != matches2.end(); ++i) + { + living.insert(idx(*weave, i->first).second); + } + vector > > > toinsert; + int lasta = -1, lastb = -1; + matches2.push_back(make_pair(weave->size(), result.size())); + for (vector >::iterator i = matches2.begin(); + i != matches2.end(); ++i) + { + for (int x = lastb + 1; x < i->second; ++x) + { + pair newline(revision, x); + living.insert(newline); + toinsert.push_back(make_pair(lasta + 1, + make_pair(idx(result,x), + newline))); + } + lasta = i->first; + lastb = i->second; + } + reverse(toinsert.begin(), toinsert.end()); + for (vector > > >::iterator i + = toinsert.begin(); i != toinsert.end(); ++i) + weave->insert(weave->begin() + i->first, i->second); + file_state out(weave); + for (vector > >::iterator i + = weave->begin(); i != weave->end(); ++i) + { + out.states[i->second] = + states[i->second].set_living(revision, + living.find(i->second) != living.end()); + } + return out; + } +}; + +vector +get_file(revision_id const & rev, string const & filename, app_state & app) +{ + if (null_id(rev)) + return vector(); + manifest_id mid; + app.db.get_revision_manifest(rev, mid); + file_path fp(filename); + manifest_map m; + app.db.get_manifest(mid, m); + manifest_map::const_iterator i = m.find(fp); + if(i == m.end()) + return vector(); + file_id ident = manifest_entry_id(i); + file_data dat; + L(F("dumping file %s\n") % ident); + app.db.get_file_version(ident, dat); + vector lines; + split_into_lines(dat.inner()(), "utf8", lines); + for (vector::iterator i = lines.begin(); + i != lines.end(); ++i) + (*i)+='\n'; + return lines; +} + +CMD(pcdv, "debug", "REVISION REVISION FILENAME", + "precise-cdv merge FILENAME in the two given revisions", + OPT_NONE) +{ + if (args.size() != 3) + throw usage(name); + + typedef std::multimap::iterator gi; + typedef std::map > >::iterator pi; + std::multimap graph; + app.db.get_revision_ancestry(graph); + std::set leaves; + app.db.get_revision_ids(leaves); + std::map > > parents; + for (gi i = graph.begin(); i != graph.end(); ++i) + parents.insert(std::make_pair(i->first, + std::make_pair(0, vector()))); + for (gi i = graph.begin(); i != graph.end(); ++i) + { + std::pair > & p(parents[i->second]); + ++p.first; + p.second.push_back(i->first); + } + // first find the set of graph roots + std::list roots; + for (pi i = parents.begin(); i != parents.end(); ++i) + if(i->second.first == 0) + roots.push_back(i->first); + map files; + file_state empty = file_state(vector(), string()); + std::set heads; + while (!roots.empty()) + { + file_state p(empty); + vector const & ps(parents[roots.front()].second); + if (ps.size() == 0) + p = empty; + else if (ps.size() == 1) + { + map::iterator i = files.find(ps.front()); + I(i != files.end()); + p = i->second; + } + else + { + I(ps.size() == 2); + map::iterator i = files.find(ps.front()); + I(i != files.end()); + map::iterator j = files.find(ps.back()); + I(j != files.end()); + p = i->second.mash(j->second); + } + vector contents(get_file(roots.front(), idx(args, 2)(), app)); + files.insert(std::make_pair(roots.front(),p.resolve(contents, roots.front().inner()()))); + heads.insert(roots.front()); + for (vector::const_iterator i = ps.begin(); + i != ps.end(); ++i) + heads.erase(*i); + for(gi i = graph.lower_bound(roots.front()); + i != graph.upper_bound(roots.front()); i++) + if(--(parents[i->second].first) == 0) + roots.push_back(i->second); + graph.erase(roots.front()); + leaves.erase(roots.front()); + roots.pop_front(); + } + revision_id left, right; + complete(app, idx(args, 0)(), left); + complete(app, idx(args, 1)(), right); + map::iterator l = files.find(left); + N(l != files.end(), F("Not found.")); + map::iterator r = files.find(right); + N(r != files.end(), F("Not found.")); + vector result(l->second.conflict(r->second)); + bool lastok=false; + for (vector::iterator i = result.begin(); + i != result.end(); ++i) + { + if (i->split) + { + if (i->left.size()) + { + cout<<"<<<<<<<<<<"<<'\n'; + for (vector::iterator j = i->left.begin(); + j != i->left.end(); ++j) + cout<<*j; + } + if (i->right.size()) + { + cout<<">>>>>>>>>"<<'\n'; + for (vector::iterator j = i->right.begin(); + j != i->right.end(); ++j) + cout<<*j; + } + lastok = false; + } + else + { + if (i->left.size()) + { + if (!lastok) + cout<<"=========="<<'\n'; + for (vector::iterator j = i->left.begin(); + j != i->left.end(); ++j) + cout<<*j; + } + lastok = true; + } + } +} + + + + + + + + + + + }; // namespace commands