#
# add_dir ""
#
# add_file "drop_caches.c"
#  content [1f22e3a53336cca84fea90131a203b22db7e7a98]
#
# add_file "speedtest.py"
#  content [bb4c94f9565f26aa6aeedf27477632185eba505b]
--- drop_caches.c
+++ drop_caches.c
@@ -0,0 +1,75 @@
+/*
+ * A little program that attempts to clear caches where-ever it finds itself.
+ * This is in C in order that it can be setuid.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+/*
+ * Try /proc/sys/vm/drop_caches -- see http://linux-mm.org/Drop_Caches for
+ * details.
+ */
+int
+try_linux26()
+{
+    printf("Trying /proc/sys/vm/drop_caches...\n");
+    FILE *f = fopen("/proc/sys/vm/drop_caches", "w");
+    if (!f)
+    {
+        printf("Error opening /proc/sys/vm/drop_caches: %s\n", strerror(errno));
+        return 0;
+    }
+    /* we leak the fd, but who cares */
+    if (fprintf(f, "3\n") < 0)
+    {
+        printf("Error writing to file\n");
+        return 0;
+    }
+    return 1;
+}
+
+/*
+ * Try just allocating and touching a big pile of memory -- not the most
+ * reliable method, but the most portable.
+ *
+ * On my linux 2.6.15 box, anyway, this never actually gets a error from
+ * malloc(); instead, it just triggers the OOM killer, which might in
+ * principle decide to kill some other process entirely.  Not a reliable
+ * method!  It's also slow.
+ *
+ * Only enable this method if you have read the above and decide you want to
+ * use it anyway.
+ */
+int
+try_generic()
+{
+    /* disabled -- see above. */
+    return 1;
+    
+    printf("Trying to allocate infinite memory...\n");
+    size_t allocated = 0;
+    size_t blocksize = (1024 * 1024);
+    while (1)
+    {
+        void *data = malloc(blocksize);
+        if (!data)
+        {
+            printf("Ran out of memory after %ul bytes.\n", allocated);
+            return 1;
+        }
+        allocated += blocksize;
+        memset(data, 42, blocksize);
+    }
+}
+
+int main(int argc, char** argv)
+{
+    if (try_linux26())
+        return 0;
+    if (try_generic())
+        return 0;
+    return 1;
+}
--- speedtest.py
+++ speedtest.py
@@ -0,0 +1,362 @@
+import random
+import os
+import os.path
+import subprocess
+import shutil
+import time
+import csv
+
+def ensure_dir(path):
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+def clear_caches():
+    pass
+
+class Process(object):
+    # We stuff the called process into its own process group using the
+    # preexec_fn hack.  We need to do this so that we can clean up
+    # everything that is spawned.  In particular, if we run 'time mtn serve',
+    # then without the process group hack we can only kill the 'time' process,
+    # not the 'mtn serve' process (!).
+    def __init__(self, cmd):
+        self.popen = subprocess.Popen(cmd, preexec_fn=os.setsid)
+        self.end_hook = None
+
+    # We use a negative pid to kill the group, not just the process.
+    # We use SIGINT, because if we send SIGTERM to time(1), it gives no
+    # output, but for SIGINT it does.
+    def kill(self):
+        os.kill(-self.popen.pid, 2) # SIGINT
+        return self.wait()
+
+    def wait(self):
+        stdout, stderr = self.popen.communicate()
+        returncode = self.popen.wait()
+        result = (returncode, stdout, stderr)
+        if self.end_hook is not None:
+            self.end_hook(result)
+        return result
+
+    def hook(self, hook):
+        self.end_hook = hook
+
+    def __del__(self):
+        if self.popen is not None and self.popen.poll() is None:
+            os.kill(self.popen.pid, 9)
+
+class InstrumenterFactory(object):
+    def __init__(self, klass, repeats=1):
+        self.klass = klass
+        self.repeats = repeats
+
+    def make(self, record_dir):
+        return self.klass(record_dir)
+
+class Instrumenter(object):
+    def __init__(self, record_dir):
+        pass
+
+    def run(self, name, cmd):
+        # Runs the executable with the given args, and uses 'name' to record
+        # instrumented results under.  Implemented in terms of run_bg.
+        return self.run_bg(name, cmd).wait()
+
+    def run_bg(self, name, cmd):
+        # Same as above, but immediately returns an object that one should
+        # later call 'kill' or 'wait' on.  One need only override this to
+        # override behavior.
+        return Process(cmd)
+
+    def record_stat(self, name, value):
+        pass
+
+    def record_file(self, name, path):
+        pass
+
+    def flush(self):
+        pass
+
+class RecordingInstrumenter(Instrumenter):
+    def __init__(self, record_dir):
+        self.dir = record_dir
+        ensure_dir(self.dir)
+        self.stats = {}
+
+    def record_stat(self, name, value):
+        self.stats.setdefault(name, []).append(value)
+
+    def flush(self):
+        f = open(os.path.join(self.dir, "stats.csv"), "w")
+        w = csv.writer(f)
+        items = self.stats.items()
+        items.sort()
+        for key, values in items:
+            w.writerow([key] + values)
+        f.close()
+
+    def record_file(self, name, path):
+        target_path = os.path.join(self.dir, name)
+        if os.path.exists(target_path):
+            raise KeyError, name
+        if os.path.isdir(path):
+            shutil.copytree(path, target_path)
+        else:
+            shutil.copy2(path, target_path)
+
+class TimingInstrumenter(RecordingInstrumenter):
+    def parse_time_str(self, s):
+        # 1.7 -> 1.7
+        # 1:20.3 -> 80.3
+        if ":" in s:
+            minutes, seconds = s.split(":")
+            return 60 * int(minutes) + float(seconds)
+        else:
+            return float(s)
+
+    def run_bg(self, name, cmd):
+        # We put a noticable string "DATA" at the start of the format, so that
+        # we can find it even if time(1) decides to print other garbage, like
+        # "Command exited with non-zero status 1".
+        my_cmd = ["time", "-f", "DATA: %U %S %E", "-o", "timings-" + name] + cmd
+        def timing_hook(result):
+            timing_file = open("timings-" + name, "r")
+            for line in timing_file:
+                if line.startswith("DATA"):
+                    break
+            assert line.startswith("DATA")
+            junk, user, sys, wall = line.split()
+            self.record_stat(name + "-user-time", self.parse_time_str(user))
+            self.record_stat(name + "-system-time", self.parse_time_str(sys))
+            self.record_stat(name + "-wall-time", self.parse_time_str(wall))
+        process = super(TimingInstrumenter, self).run_bg(name, my_cmd)
+        process.hook(timing_hook)
+        return process
+
+
+class Benchmark(object):
+    def setup(self, testable):
+        # I am called from a special directory, which my run method will
+        # eventually be called in.
+        # I set up the directory in some useful way.
+        pass
+
+    def run(self, testable):
+        # I am called in the directory that setup() previously ran in (or a
+        # copy of it).  My testable may or may not be identical.
+        pass
+
+class TestableFactory(object):
+    def new(self, instrumenter):
+        return Testable(instrumenter)
+
+class Testable(object):
+    def setup(self):
+        # like benchmark setup, but anything the VCS wants to do
+        pass
+
+    def pull(self, name, repo, instrumenter):
+        pass
+
+class ExistingRepo(object):
+    def __init__(self, path):
+        self.path = os.path.abspath(path)
+
+    def setup(self):
+        pass
+
+    def repo_path(self):
+        return self.path
+
+# each benchmark has a setup and a run
+# both are done in the same directory
+# and a directory to drop auxiliary results in
+
+# copying stuff to server vs. client...
+
+#FreshPull(db=ExistingDB("..."))
+#FreshPull(db=FreshPull(ExistingDB("...")))
+#FreshPull(db=IncrPull(RandomDB(tree_depth=4, tree_width=10, edit_rate=3, edit_density=0.5)))
+
+# main driver:
+#   -- needs a scratch dir
+#   -- needs a results dir, which will end up with, for each
+#      program-under-test, a subdir with a .csv file plus extra subdirs for
+#      any auxiliary info
+#   -- needs a list of program-under-tests
+#   -- needs a list of benchmarks to run
+#   -- needs to know how many times to run each test
+#   -- whether to leave scratch dirs behind
+#
+# what it does:
+#   arranges for each test to be setup (with instrumentation turned off), and
+#   then run (multiple times, with instrumentation turned on, and each time in
+#   a clean dir, generally killing that dir after each run to save space)
+#   and arranges for instrumentation to go to the right place
+
+# for each test:
+#   for each program under test:
+#     do the setup
+#   for 1..number of runs:
+#     for each program under test, in random order:
+#       set up a run dir
+#       clear caches
+#       run the test
+#       (remove the run dir, if desired)
+#   get the instrumentation to combine multiple runs together, however it
+#     wants to do that...
+
+class MtnFactory(object):
+    def __init__(self, path):
+        self.path = path
+
+    def new(self, instrumenter):
+        return Mtn(self.path, instrumenter)
+
+class Mtn(object):
+    def __init__(self, path, instrumenter):
+        self.path = path
+        self.instrumenter = instrumenter
+
+    def setup(self):
+        shutil.copy(self.path, "mtn-server")
+        shutil.copy(self.path, "mtn-client")
+        shutil.copy(self.path, "mtn")
+        f = open("server-perms.lua", "w")
+        f.write("""
+        function get_netsync_read_permitted(pattern, identity)
+            return true
+        end
+        function get_netsync_write_permitted(identity)
+            return true
+        end
+        function get_passphrase(keyid)
+            return keyid
+        end
+        """)
+        f.close()
+        os.mkdir("keys")
+        f = open("keys/address@hidden", "w")
+        f.write("""[keypair address@hidden
+MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC6Pz+IvvOCDDqzN9WFO/zOjL7s9dVCS+zn
+s/L9jQ2kHfNWXFof4GcgmMu4DfU4sUrRz39QxDlUrxEOvmIc9z3DNuIcbFZx7UZg9DWfdmDm
+vbW79bZlVMeIudAIUwa6euX163AK5hacmqJfuB5U7awQM9o3rn8JYULleAoz5QTtawIDAQAB#
+MIICyTBDBgkqhkiG9w0BBQ0wNjAeBgkqhkiG9w0BBQwwEQQIvYSV8ucj9m4CAggAAgEYMBQG
+CCqGSIb3DQMHBAg/BZPM2O3QfASCAoBBGkVz4E/Pr1CsIioC92eCz4qWLclhc53HgHSCEo9I
+XdNCTpCs/oxOXhQ0WQCPFhYEaxU8STgZm0Yhq8WEF1QfxOPOU8nDiwMT0L7/ARruu5bTCxnW
+B3kkn+XiO5GldVJhULFlrl91t83yMsTSw+vyCyxZkqewBLR7mqHQUe2suVquMyutxxr2vZgV
+QMfRxk65fSvySUHeNaj1dmakYcpP+35iejyUTAtAGuBsv2C68bwif4wkpLpedghNCtmccSdQ
+t9QDF3yy6Q42tAW/OK6/t836/qn39f+47Kp4LMJUMmxNrtV7IntIkgBGgnGsqP9Br2B4GYXc
+sWK0YApA3+Sf3kfH/wQ6Hib8nN4YxUTxxnS9WNHvRFrXCmfbGd5vAzi4lKCm/W+2Nlpd4DDQ
+3JZjjCR73PMfKtHJCGULkNkK/9kRyhLYql2u/ZUJoEcdZxzEpYgExW8Wu1CrCVtWd+ueXs1h
+or6Fdua7Gg4cjMgVg6EUSxdMBFQCX8heD8JeG6jMFNR9hTxe8o/PK8Ys63JyLMLRUv3Ud+f8
+8T0TtCZV5+rgLfvb6k89uDJJK228WuJB6rp8S+qqq30RFPmkzW8JNulRilY+wrIfcowA6+TA
+T5WKzFOIbkZd/R34tNLJMjTJlUq6SQKaOlQnqOEFbyY/GXgzYgnmc3tl8pigXEJvNzU5GiuB
+ib35QQbzh87KlfLtWELK+8ZoyhZAZAMr97IavUbuFubOyEoEozUliARyRZ1ZudM4Ii+J6TRX
+cmLryIBlz3OXgUUBSwJPwtWuR4tZ8nIt7cVJr7pxLblGfeFuu01HWN55hv4C78/aNSipVYCF
+OFt8n7wQHxbbJvoTIdd/
+[end]""")
+        f.close()
+
+
+    def pull(self, source, target):
+        server = self.instrumenter.run_bg("server",
+                                          ["./mtn-server",
+                                           "--rcfile", "server-perms.lua",
+                                           "--keydir", "keys",
+                                           "-d", source,
+                                           "--bind=localhost:12345",
+                                           "serve", "*"])
+        time.sleep(3)
+        self.instrumenter.run("pull",
+                              ["./mtn-client", "-d", target, "pull", "localhost:12345", "*"])
+        server.kill()
+
+    def init_repo(self, repo):
+        self.instrumenter.run("init_repo",
+                              ["./mtn", "db", "init", "-d", repo])
+
+class PullBenchmark(object):
+    def __init__(self, repo_source):
+        self.repo_source = repo_source
+
+    def setup(self, vcs):
+        vcs.init_repo("target")
+        self.repo_source.setup()
+
+    def run(self, vcs):
+        vcs.pull(self.repo_source.repo_path(), "target")
+
+
+class Driver(object):
+    def __init__(self, scratch, results, testables, benchmarks, instrumenters, debug):
+        self.scratch = os.path.abspath(scratch)
+        self.results = os.path.abspath(results)
+        self.testables = testables
+        self.benchmarks = benchmarks
+        self.instrumenters = instrumenters
+        self.debug = debug
+
+    def run(self):
+        startdir = os.getcwd()
+        # FIXME: now make it actually use the self.instrumenters variable...
+        # also, give things names -- I guess by turning these things into
+        #   dicts
+        # also, figure out some instance/class/factory naming scheme that
+        #   makes sense
+        instrumenter_factory = InstrumenterFactory(TimingInstrumenter, 2)
+        i = 0
+        for benchmark in self.benchmarks:
+            i += 1
+            benchdir = os.path.join(self.scratch, str(i))
+            
+            for j in xrange(len(self.testables)):
+                testable = self.testables[j].new(Instrumenter(""))
+                setupdir = os.path.join(benchdir, "setup-" + str(j))
+                ensure_dir(setupdir)
+                os.chdir(setupdir)
+                testable.setup()
+                benchmark.setup(testable)
+
+            instrumenters = []
+            for j in xrange(len(self.testables)):
+                recorddir = os.path.join(self.results, "%s-%s" % (i, j))
+                instrumenters.append(instrumenter_factory.make(recorddir))
+
+            for repeat in xrange(instrumenter_factory.repeats):
+                plan = range(len(self.testables))
+                random.shuffle(plan)
+                for j in plan:
+                    instrumenter = instrumenters[j]
+                    testable = self.testables[j].new(instrumenter)
+                    rundir = os.path.join(benchdir, "run-%s-%s" % (j, repeat))
+                    shutil.copytree(os.path.join(benchdir, "setup-" + str(j)),
+                                    rundir)
+                    os.chdir(rundir)
+                    clear_caches()
+                    benchmark.run(testable)
+                    instrumenter.flush()
+                    if not self.debug:
+                        os.chdir(startdir)
+                        shutil.rmtree(rundir)
+
+            if not self.debug:
+                for j in xrange(len(self.testables)):
+                    shutil.rmtree(os.path.join(benchdir, "setup-" + str(j)))
+
+def tryit():
+    scratch = "scratch"
+    results = "results"
+    shutil.rmtree(scratch, True)
+    shutil.rmtree(results, True)
+    
+    testables = [MtnFactory("/home/njs/src/monotone/opt/mtn")]
+    benchmarks = [PullBenchmark(ExistingRepo("/home/njs/src/monotone/speedtest/t.db"))]
+    instrumenters = [InstrumenterFactory(TimingInstrumenter, 2)]
+    debug = 1
+
+    driver = Driver(scratch, results, testables, benchmarks, instrumenters, debug)
+    driver.run()
+
+if __name__ == "__main__":
+    tryit()