# # add_file "TODO" # # patch "TODO" # from [] # to [cf0d5dc2493d53cf6ae94759cb0da5639de3e8cb] # # patch "dumb.py" # from [86cc3fff44c25f7d941346c8b0ab44259d69b20d] # to [b520c982593c8711ef22e447b7084ee0a7ea7171] # # patch "merkle_dir.py" # from [fea4ab58f66ad1998e570b0ed5b0615f2845a068] # to [d4e89ce8e256741fd064a9694d06026e341c5b84] # ======================================================================== --- TODO +++ TODO cf0d5dc2493d53cf6ae94759cb0da5639de3e8cb @@ -0,0 +1,9 @@ +* packet commands -> automate? +* merkle dir stuff that doesn't require loading entire chunks into + memory all the time +* ftp/sftp/http +* possibly better rollback stuff? + - e.g., truncate when possible + - include some info in the lockdir on who has things locked, to aid + in detecting staleness of locks +* file reader needs to do the pause/retry thing on missing files ======================================================================== --- dumb.py 86cc3fff44c25f7d941346c8b0ab44259d69b20d +++ dumb.py b520c982593c8711ef22e447b7084ee0a7ea7171 @@ -1,133 +1,9 @@ import sha from sets import Set import os import os.path from cStringIO import StringIO -# What's on disk: -# We write all interesting data out to a file DATA -# It consists of a bunch of "chunks", appended one after another -# each chunk is a deflated (not gzipped) block -# after decompression, each such chunk can be fed to 'monotone read' -# each chunk has a unique id. it is assumed that the id<->data mapping is -# immutable; in particular, when transferring, we assumed that if we have -# an id already, then we already have the corresponding data -# a file INDEX is maintained which contains table-of-contents information -# for DATA -- each line in INDEX gives metadata for the next chunk in -# DATA. this metadata is associated line, offset in DATA, number of bytes -# taken up in DATA. This allows, for instance, random access into DATA. -# the INDEX file is used entirely locally; we never look at it over the -# network. -# locally, the INDEX file is used to generate the HASHES files -# the HASHES files implement the merkle trie algorithm -# we chop off our merkle tree at depth 2; this should give good loading for -# even very large projects, and is not so large that we pay excessive -# overhead on smaller projects -# this means there are two kinds of HASHES files -- the root file -# ("HASHES_"), and the child files ("HASHES_<2 digits of hex>") -# both kinds of HASHES_ files are deflated as a whole -# the child files contain a bunch of lines of the form: -# chunk -# where begins with the 2 hex digits that this file is responsible -# for, and and are the location of the corresponding -# data in DATA. These files are sorted lexicographically by id. -# the root file contains a bunch of lines of the form: -# subtree -# where is the prefix of the tree being referred to (always two -# digits), is the child file containing the corresponding -# information, and is the sha1 of the string -# "\0\0...\0", for all ids mentioned in . -# This file is always sorted lexicographically by prefix. -# (FIXME: should we always generate child files for all prefixes, even if -# some are unused?) -# finally, there's a file VERSION, which contains the sha1 of the string -# "\0\0...\0", plus a newline. -# finally2, there's a file called LENGTH, which contains the size of -# the file DATA in bytes, and then the size of the file INDEX in bytes. - -# sometimes there is a file _rollback, which is a copy of LENGTH made before -# starting a transaction. the way this works is that if a transaction gets -# uncleanly aborted (which we try to make sure doesn't happen, because it is -# icky), then we use this file to do rollback. rollback can only be done -# locally -- the way it works is that we truncate DATA and INDEX to the -# lengths given in _rollback, replace LENGTH by _rollback, and regenerate all -# hashes from scratch. -# actually, SFTP supports truncate (paramiko doesn't expose it, but all you -# have to do is create a SFTPAttributes object, set its size field, and send a -# setstat request on it -- see implementation of paramiko's chmod, for -# similar). which means that it would be better to not have to regenerate all -# hash files from scratch. various options... move them out of the way, don't -# delete them until everything is committed, perhaps. -# -# hmm... even if we don't have truncate, we should be able to restore original -# hash files, at which point, we have random junk in DATA and INDEX but it -# doesn't matter too much... INDEX will become corrupted, of course. - -# How a pull works: -# -- pull VERSION, see if it matches ours. (note down what it says for -# later.) -# -- if not, pull root hashes file. compare each line to our hashes. for -# each one that differs, fetch the child file. -# -- compare fetched child files to our child files. For each line that's -# new in the fetched files, note down the id/offset/length in a list. -# -- after we have processed _all_ child files, use the list to figure out -# which parts of the DATA file to fetch. coalesce adjacent sections at -# this stage. (but keep track of how everything matches back up to ids.) -# -- do one or more byte-range fetches to grab the stuff we need -# -- append the results to our DATA -# -- add the new stuff to INDEX, with the appropriate offsets for our DATA -# file -# -- rehash -# (-- pull VERSION again, see if it's the same as it was at the beginning. -# If not, warn the user or just print a message and go back to step 1.) -# How a push works: -# -- lock the remote copy (see below) -# -- pull VERSION/pull HASHES_/pull any child hash files we need -# -- save HASHES_ and the child hash files so we can update them! -# -- before we write anything, replace remote VERSION with a random nonce -# (so that reads can reliably detect concurrent updates) -# -- for each id we have that are not mentioned in the appropriate child -# hash file, pull out the appropriate chunk from our DATA, assemble these -# pieces together, and append them to the remote DATA file -# also append appropriate lines to the remote INDEX -# -- _update_ atomically each child hash file we pulled to include the stuff -# we appended (do _not_ just push our version, remote side may have stuff -# we don't, and everything may be at different offsets) -# -- after child hash files have all been written, update root hash file -# similarly -# -- finally update VERSION - -## FIXME: I guess INDEX should just contain lengths, so that it can be updated -## when pushing (without having to know the length of the remote DATA file) -#### No -- still need to update child hash files, which do have to know -#### offset. So add a file LENGTH_OF_DATA or something, that we read after we -#### get our edit lock, and update after we finished appending. - -# remote operations: -# -- synchronous append -# -- atomic update -- can queue multiple, block on all completing -# -- fetch file (sync and async) -# -- fetch byte ranges (sync, but perhaps should be able to pipeline -# fetching multiple different ranges?) - -# so: "append and then return" -# "fetch all of these files and then return" -# "atomically replace all of these files and then return" -# "fetch this list of byte ranges from a given file and then return" -# "query if this file exists and then return" (may just attempt fetching -# it and see what happens) -# "delete file" - -# also write a file called _lock_info or something containing info on who -# created the lock, to ease in cleaning things up. -# -# there is no atomic replace in sftp (probably not ftp either). can write, -# delete, rename, to minimize window. but clients should be prepared to retry -# if a file fetch fails. - -# TODO: -# -- cat revision and packet commands -> automate? - def do_import(monotone, dir): monotone.ensure_db() md = MerkleDir(dir) ======================================================================== --- merkle_dir.py fea4ab58f66ad1998e570b0ed5b0615f2845a068 +++ merkle_dir.py d4e89ce8e256741fd064a9694d06026e341c5b84 @@ -1,8 +1,84 @@ import sha import os import os.path import zlib +# What's on disk: +# We write all interesting data out to a file DATA +# It consists of a bunch of "chunks", appended one after another +# each chunk is a raw block of bytes, as passed to add() +# each chunk has a unique id. it is assumed that the id<->data mapping is +# immutable; in particular, when transferring, we assumed that if we have +# an id already, then we already have the corresponding data +# ids should probably be hashes of something or the other; they don't +# actually have to be, but we do rely on them "looking like" hex encoded +# hashes (e.g., first 2 characters are randomly distributed, doesn't +# contain spaces, etc.) +# the HASHES files implement the merkle trie algorithm +# we chop off our merkle tree at depth 2; this should give good loading for +# even very large projects, and is not so large that we pay excessive +# overhead on smaller projects +# this means there are two kinds of HASHES files -- the root file +# ("HASHES_"), and the child files ("HASHES_<2 digits of hex>") +# both kinds of HASHES_ files are deflated (not gzipped) as a whole +# both sorts of files are sorted asciibetically +# the child files contain a bunch of lines of the form: +# chunk +# where begins with the 2 hex digits that this file is responsible +# for, and and are the location of the corresponding +# data in DATA. +# the root file contains a bunch of lines of the form: +# subtree +# where is the prefix of the tree being referred to (always two +# digits), is the child file containing the corresponding +# information, and is the sha1 of the string +# "\0\0...\0", for all ids mentioned in . +# +# Finally, there may be a directory "_lock". This is used for locking; see +# below. +# +# Transaction handling: +# Readers pay no attention to transactions or writers. Writers assume the +# full burden of keeping the repo fully consistent at all times. +# Writers: +# -- first acquire a lock by creating the directory _lock. This ensures +# that only one writer will run at a time. +# -- append everything they want to append to DATA. Until this part of +# the file is referenced by HASHES files, it won't be noticed. +# Before appending any particular item, they read child hash file that +# would reference it, and make sure that it does not already exist. +# If interrupted at this point, unreferenced garbage may be left in +# DATA, but this is harmless. +# -- start atomically replacing child hash files. +# During this phase, the root hash file will lag behind the child hash +# files -- it will describe them as containing less than they actually +# do. This does not cause any problems, because +# a) when determining whether an id exists, we always read the child +# hash file (as noted in previous step) +# b) the only things that can happen to readers are that they fetch +# a file that is _even newer_ than they were expecting (in which +# case, they actually wanted it _even more_ than they realized), +# or that they skip a file that has been replaced, but was +# unininteresting before being replaced. +# This does mean that a pull is not guaranteed to fetch everything that +# was included in the most recent push; it may instead fetch only some +# random subset. Users of this library must be robust against this +# possibility -- even if two items A and B were added at the same time, +# a client may receive only A, or only B. +# +# In some situations (FTP, NTFS, ...) it is actually impossible to +# atomically replace a file. In these cases we simply bite the bullet +# and have a very small race condition, while each file is being +# swapped around. If readers try to open a file but find it does not +# exist, they should try again after a short pause, before giving up. +# -- atomically replace the root hash file (subject again to the above +# proviso) +# -- remove the lockdir +# Rollback: +# If a connection is interrupted uncleanly, or there is a stale lock, we: +# -- check for any missing files left by non-atomic renames +# -- remove the lockdir + class HashFile: prefix = "" values = () @@ -45,6 +121,7 @@ for key, values in self: value_txt = " ".join([str(v) for v in values]) lines.append("%s %s %s" % (self.prefix, key, value_txt)) + lines.sort() return zlib.compress("\n".join(lines)) # yields (key, values) @@ -206,6 +283,7 @@ self._ids_to_flush = [] #### Adding new items + # can only be called from inside a transaction. def add(self, id, data): self._need_lock() assert None not in (self._data_handle,