# # # patch "revision.cc" # from [c70161ded1af39115360f97be69884f847e43940] # to [30ef8973113c12ec0f7a53f1112bd6252b1beb7d] # # patch "work.cc" # from [b9dfdced17ced6f9849d0a7353dd9af4556c8d74] # to [7c3492b6c9f41728f0f70bf75f77e447ef21c37c] # ============================================================ --- revision.cc c70161ded1af39115360f97be69884f847e43940 +++ revision.cc 30ef8973113c12ec0f7a53f1112bd6252b1beb7d @@ -233,27 +233,27 @@ // FIXME: this algorithm is incredibly inefficient; it's O(n) where n is the // size of the entire revision graph. -static bool -is_ancestor(revision_id const & ancestor_id, - revision_id const & descendent_id, - std::multimap const & graph) +template static bool +is_ancestor(T const & ancestor_id, + T const & descendent_id, + std::multimap const & graph) { - std::set visited; - std::queue queue; + std::set visited; + std::queue queue; queue.push(ancestor_id); while (!queue.empty()) { - revision_id current_id = queue.front(); + T current_id = queue.front(); queue.pop(); if (current_id == descendent_id) return true; else { - typedef std::multimap::const_iterator gi; + typedef typename std::multimap::const_iterator gi; std::pair children = graph.equal_range(current_id); for (gi i = children.first; i != children.second; ++i) { @@ -574,7 +574,7 @@ void add_node_ancestry(u64 child, u64 parent); void write_certs(); - void kluge_for_3_ancestor_nodes(); + void kluge_for_bogus_merge_edges(); void rebuild_ancestry(); void get_node_manifest(u64 node, manifest_id & man); u64 add_node_for_old_manifest(manifest_id const & man); @@ -641,93 +641,88 @@ } void -anc_graph::kluge_for_3_ancestor_nodes() +anc_graph::kluge_for_bogus_merge_edges() { - // This method is, as the name suggests, a kluge. It exists because in the - // 0.17 timeframe, monotone's ancestry graph has several nodes with 3 - // parents. This isn't, in principle, necessarily a bad thing; having 3 - // parents is reasonably well defined, I don't know of much code that is - // dependent on revisions having only 2 parents, etc. But it is a very - // weird thing, that we would never under any circumstances create today, - // and it only exists as a side-effect of the pre-changeset days. In the - // future we may decide to allow 3+-parent revisions; we may decide to - // disallow it. Right now, I'd rather keep options open. - // We remove only edges that are "redundant" (i.e., already weird...). - // These are also something that we currently refuse to produce -- when a - // node has more than one parent, and one parent is an ancestor of another. - // These edges, again, only exist because of weirdnesses in the - // pre-changeset days, and are not particularly meaningful. Again, we may - // decide in the future to use them for some things, but... - // FIXME: remove this method eventually, since it is (mildly) destructive on - // history, and isn't really doing anything that necessarily needs to happen - // anyway. - P(F("scanning for nodes with 3+ parents\n")); - std::set manyparents; + // This kluge exists because in the 0.24-era monotone databases, several + // bad merges still existed in which one side of the merge is an ancestor + // of the other side of the merge. In other words, graphs which look like + // this: + // + // a ----------------------> e + // \ / + // \---> b -> c -> d ----/ + // + // Such merges confuse the roster-building algorithm, because they should + // never have occurred in the first place: a was not a head at the time + // of the merge, e should simply have been considered an extension of d. + // + // So... we drop the a->e edges entirely. + // + // Note: this kluge drops edges which are a struct superset of those + // dropped by a previous kluge ("3-ancestor") so we have removed that + // code. + + P(F("scanning for bogus merge edges\n")); + + std::multimap parent_to_child_map; + for (std::multimap::const_iterator i = ancestry.begin(); + i != ancestry.end(); ++i) + parent_to_child_map.insert(std::make_pair(i->second, i->first)); + + std::map edges_to_kill; for (std::multimap::const_iterator i = ancestry.begin(); i != ancestry.end(); ++i) { - if (ancestry.count(i->first) > 2) - manyparents.insert(i->first); - } - for (std::set::const_iterator i = manyparents.begin(); - i != manyparents.end(); ++i) - { - std::set indirect_ancestors; - std::set parents; - std::vector to_examine; - for (std::multimap::const_iterator j = ancestry.lower_bound(*i); - j != ancestry.upper_bound(*i); ++j) + std::multimap::const_iterator j = i; + ++j; + u64 child = i->first; + // NB: ancestry is a multimap from child->parent(s) + if (j != ancestry.end()) { - to_examine.push_back(j->second); - parents.insert(j->second); - } - I(!to_examine.empty()); - while (!to_examine.empty()) - { - u64 current = to_examine.back(); - to_examine.pop_back(); - for (std::multimap::const_iterator j = ancestry.lower_bound(current); - j != ancestry.upper_bound(current); ++j) + if (j->first == i->first) { - if (indirect_ancestors.find(j->second) == indirect_ancestors.end()) - { - to_examine.push_back(j->second); - indirect_ancestors.insert(j->second); - } + L(F("considering old merge edge %s\n") % + safe_get(node_to_old_rev, i->first)); + u64 parent1 = i->second; + u64 parent2 = j->second; + if (is_ancestor (parent1, parent2, parent_to_child_map)) + safe_insert(edges_to_kill, std::make_pair(child, parent1)); + else if (is_ancestor (parent2, parent1, parent_to_child_map)) + safe_insert(edges_to_kill, std::make_pair(child, parent2)); } } - size_t killed = 0; - for (std::set::const_iterator p = parents.begin(); - p != parents.end(); ++p) + } + + for (std::map::const_iterator i = edges_to_kill.begin(); + i != edges_to_kill.end(); ++i) + { + u64 child = i->first; + u64 parent = i->second; + bool killed = false; + for (std::multimap::iterator j = ancestry.lower_bound(child); + j->first == child; ++j) { - if (indirect_ancestors.find(*p) != indirect_ancestors.end()) + if (j->second == parent) { - P(F("optimizing out redundant edge %i -> %i\n") % (*p) % (*i)); - // sometimes I hate STL. or at least my incompetence at using it. - size_t old_size = ancestry.size(); - for (std::multimap::iterator e = ancestry.lower_bound(*i); - e != ancestry.upper_bound(*i); ++e) - { - I(e->first == *i); - if (e->second == *p) - { - ancestry.erase(e); - break; - } - } - I(old_size - 1 == ancestry.size()); - ++killed; + P(F("optimizing out redundant edge %d -> %d\n") + % parent % child); + ancestry.erase(j); + killed = true; + break; } } - I(killed < parents.size()); - I(ancestry.find(*i) != ancestry.end()); + + if (!killed) + W(F("failed to eliminate edge %d -> %d\n") + % parent % child); } } + void anc_graph::rebuild_ancestry() { - kluge_for_3_ancestor_nodes(); + kluge_for_bogus_merge_edges(); P(F("rebuilding %d nodes\n") % max_node); { @@ -1140,6 +1135,10 @@ MM(dbg); work.pop_front(); + + if (done.find(child) != done.end()) + continue; + std::pair parent_range = ancestry.equal_range(child); std::set parents; bool parents_all_done = true; @@ -1313,7 +1312,8 @@ { if (i->first != child) continue; - work.push_back(i->second); + if (done.find(i->second) == done.end()) + work.push_back(i->second); } } } ============================================================ --- work.cc b9dfdced17ced6f9849d0a7353dd9af4556c8d74 +++ work.cc 7c3492b6c9f41728f0f70bf75f77e447ef21c37c @@ -709,11 +709,9 @@ bookkeeping_path pth = path_for_nid(nid); require_path_is_nonexistent(pth, F("path %s already exists") % pth); - file_data dat; - source.get_file_content(content, dat); - // FIXME_ROSTERS: what about charset conversion etc.? - write_data(pth, dat.inner()); safe_insert(written_content, make_pair(pth, content)); + // Defer actual write to moment of attachment, when we know the path + // and can thus determine encoding / linesep convention. return nid; } @@ -722,22 +720,39 @@ { bookkeeping_path src_pth = path_for_nid(nid); file_path dst_pth(dst); + + // Possibly just write data out into the working copy, if we're doing + // a file-create (not a dir-create or file/dir rename). + if (!file_exists(src_pth)) + { + std::map::const_iterator i + = written_content.find(src_pth); + if (i != written_content.end()) + { + if (file_exists(dst_pth)) + { + file_id dst_id; + ident_existing_file(dst_pth, dst_id, app.lua); + if (i->second == dst_id) + return; + } + file_data dat; + source.get_file_content(i->second, dat); + write_localized_data(dst_pth, dat.inner(), app.lua); + return; + } + } + + // If we get here, we're doing a file/dir rename, or a dir-create. switch (get_path_status(src_pth)) { case path::nonexistent: I(false); break; case path::file: - if (file_exists(dst_pth)) - { - std::map::const_iterator i - = written_content.find(src_pth); - I(i != written_content.end()); - file_id dst_id; - ident_existing_file(dst_pth, dst_id, app.lua); - if (i->second == dst_id) - return; - } + E(!file_exists(dst_pth), + F("renaming '%s' onto existing file: '%s'\n") + % src_pth % dst_pth); break; case path::directory: if (directory_exists(dst_pth))