# # add_dir "" # # add_file "drop_caches.c" # content [1f22e3a53336cca84fea90131a203b22db7e7a98] # # add_file "speedtest.py" # content [bb4c94f9565f26aa6aeedf27477632185eba505b] --- drop_caches.c +++ drop_caches.c @@ -0,0 +1,75 @@ +/* + * A little program that attempts to clear caches where-ever it finds itself. + * This is in C in order that it can be setuid. + */ + +#include +#include +#include +#include + +/* + * Try /proc/sys/vm/drop_caches -- see http://linux-mm.org/Drop_Caches for + * details. + */ +int +try_linux26() +{ + printf("Trying /proc/sys/vm/drop_caches...\n"); + FILE *f = fopen("/proc/sys/vm/drop_caches", "w"); + if (!f) + { + printf("Error opening /proc/sys/vm/drop_caches: %s\n", strerror(errno)); + return 0; + } + /* we leak the fd, but who cares */ + if (fprintf(f, "3\n") < 0) + { + printf("Error writing to file\n"); + return 0; + } + return 1; +} + +/* + * Try just allocating and touching a big pile of memory -- not the most + * reliable method, but the most portable. + * + * On my linux 2.6.15 box, anyway, this never actually gets a error from + * malloc(); instead, it just triggers the OOM killer, which might in + * principle decide to kill some other process entirely. Not a reliable + * method! It's also slow. + * + * Only enable this method if you have read the above and decide you want to + * use it anyway. + */ +int +try_generic() +{ + /* disabled -- see above. */ + return 1; + + printf("Trying to allocate infinite memory...\n"); + size_t allocated = 0; + size_t blocksize = (1024 * 1024); + while (1) + { + void *data = malloc(blocksize); + if (!data) + { + printf("Ran out of memory after %ul bytes.\n", allocated); + return 1; + } + allocated += blocksize; + memset(data, 42, blocksize); + } +} + +int main(int argc, char** argv) +{ + if (try_linux26()) + return 0; + if (try_generic()) + return 0; + return 1; +} --- speedtest.py +++ speedtest.py @@ -0,0 +1,362 @@ +import random +import os +import os.path +import subprocess +import shutil +import time +import csv + +def ensure_dir(path): + if not os.path.exists(path): + os.makedirs(path) + +def clear_caches(): + pass + +class Process(object): + # We stuff the called process into its own process group using the + # preexec_fn hack. We need to do this so that we can clean up + # everything that is spawned. In particular, if we run 'time mtn serve', + # then without the process group hack we can only kill the 'time' process, + # not the 'mtn serve' process (!). + def __init__(self, cmd): + self.popen = subprocess.Popen(cmd, preexec_fn=os.setsid) + self.end_hook = None + + # We use a negative pid to kill the group, not just the process. + # We use SIGINT, because if we send SIGTERM to time(1), it gives no + # output, but for SIGINT it does. + def kill(self): + os.kill(-self.popen.pid, 2) # SIGINT + return self.wait() + + def wait(self): + stdout, stderr = self.popen.communicate() + returncode = self.popen.wait() + result = (returncode, stdout, stderr) + if self.end_hook is not None: + self.end_hook(result) + return result + + def hook(self, hook): + self.end_hook = hook + + def __del__(self): + if self.popen is not None and self.popen.poll() is None: + os.kill(self.popen.pid, 9) + +class InstrumenterFactory(object): + def __init__(self, klass, repeats=1): + self.klass = klass + self.repeats = repeats + + def make(self, record_dir): + return self.klass(record_dir) + +class Instrumenter(object): + def __init__(self, record_dir): + pass + + def run(self, name, cmd): + # Runs the executable with the given args, and uses 'name' to record + # instrumented results under. Implemented in terms of run_bg. + return self.run_bg(name, cmd).wait() + + def run_bg(self, name, cmd): + # Same as above, but immediately returns an object that one should + # later call 'kill' or 'wait' on. One need only override this to + # override behavior. + return Process(cmd) + + def record_stat(self, name, value): + pass + + def record_file(self, name, path): + pass + + def flush(self): + pass + +class RecordingInstrumenter(Instrumenter): + def __init__(self, record_dir): + self.dir = record_dir + ensure_dir(self.dir) + self.stats = {} + + def record_stat(self, name, value): + self.stats.setdefault(name, []).append(value) + + def flush(self): + f = open(os.path.join(self.dir, "stats.csv"), "w") + w = csv.writer(f) + items = self.stats.items() + items.sort() + for key, values in items: + w.writerow([key] + values) + f.close() + + def record_file(self, name, path): + target_path = os.path.join(self.dir, name) + if os.path.exists(target_path): + raise KeyError, name + if os.path.isdir(path): + shutil.copytree(path, target_path) + else: + shutil.copy2(path, target_path) + +class TimingInstrumenter(RecordingInstrumenter): + def parse_time_str(self, s): + # 1.7 -> 1.7 + # 1:20.3 -> 80.3 + if ":" in s: + minutes, seconds = s.split(":") + return 60 * int(minutes) + float(seconds) + else: + return float(s) + + def run_bg(self, name, cmd): + # We put a noticable string "DATA" at the start of the format, so that + # we can find it even if time(1) decides to print other garbage, like + # "Command exited with non-zero status 1". + my_cmd = ["time", "-f", "DATA: %U %S %E", "-o", "timings-" + name] + cmd + def timing_hook(result): + timing_file = open("timings-" + name, "r") + for line in timing_file: + if line.startswith("DATA"): + break + assert line.startswith("DATA") + junk, user, sys, wall = line.split() + self.record_stat(name + "-user-time", self.parse_time_str(user)) + self.record_stat(name + "-system-time", self.parse_time_str(sys)) + self.record_stat(name + "-wall-time", self.parse_time_str(wall)) + process = super(TimingInstrumenter, self).run_bg(name, my_cmd) + process.hook(timing_hook) + return process + + +class Benchmark(object): + def setup(self, testable): + # I am called from a special directory, which my run method will + # eventually be called in. + # I set up the directory in some useful way. + pass + + def run(self, testable): + # I am called in the directory that setup() previously ran in (or a + # copy of it). My testable may or may not be identical. + pass + +class TestableFactory(object): + def new(self, instrumenter): + return Testable(instrumenter) + +class Testable(object): + def setup(self): + # like benchmark setup, but anything the VCS wants to do + pass + + def pull(self, name, repo, instrumenter): + pass + +class ExistingRepo(object): + def __init__(self, path): + self.path = os.path.abspath(path) + + def setup(self): + pass + + def repo_path(self): + return self.path + +# each benchmark has a setup and a run +# both are done in the same directory +# and a directory to drop auxiliary results in + +# copying stuff to server vs. client... + +#FreshPull(db=ExistingDB("...")) +#FreshPull(db=FreshPull(ExistingDB("..."))) +#FreshPull(db=IncrPull(RandomDB(tree_depth=4, tree_width=10, edit_rate=3, edit_density=0.5))) + +# main driver: +# -- needs a scratch dir +# -- needs a results dir, which will end up with, for each +# program-under-test, a subdir with a .csv file plus extra subdirs for +# any auxiliary info +# -- needs a list of program-under-tests +# -- needs a list of benchmarks to run +# -- needs to know how many times to run each test +# -- whether to leave scratch dirs behind +# +# what it does: +# arranges for each test to be setup (with instrumentation turned off), and +# then run (multiple times, with instrumentation turned on, and each time in +# a clean dir, generally killing that dir after each run to save space) +# and arranges for instrumentation to go to the right place + +# for each test: +# for each program under test: +# do the setup +# for 1..number of runs: +# for each program under test, in random order: +# set up a run dir +# clear caches +# run the test +# (remove the run dir, if desired) +# get the instrumentation to combine multiple runs together, however it +# wants to do that... + +class MtnFactory(object): + def __init__(self, path): + self.path = path + + def new(self, instrumenter): + return Mtn(self.path, instrumenter) + +class Mtn(object): + def __init__(self, path, instrumenter): + self.path = path + self.instrumenter = instrumenter + + def setup(self): + shutil.copy(self.path, "mtn-server") + shutil.copy(self.path, "mtn-client") + shutil.copy(self.path, "mtn") + f = open("server-perms.lua", "w") + f.write(""" + function get_netsync_read_permitted(pattern, identity) + return true + end + function get_netsync_write_permitted(identity) + return true + end + function get_passphrase(keyid) + return keyid + end + """) + f.close() + os.mkdir("keys") + f = open("keys/address@hidden", "w") + f.write("""[keypair address@hidden +MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC6Pz+IvvOCDDqzN9WFO/zOjL7s9dVCS+zn +s/L9jQ2kHfNWXFof4GcgmMu4DfU4sUrRz39QxDlUrxEOvmIc9z3DNuIcbFZx7UZg9DWfdmDm +vbW79bZlVMeIudAIUwa6euX163AK5hacmqJfuB5U7awQM9o3rn8JYULleAoz5QTtawIDAQAB# +MIICyTBDBgkqhkiG9w0BBQ0wNjAeBgkqhkiG9w0BBQwwEQQIvYSV8ucj9m4CAggAAgEYMBQG +CCqGSIb3DQMHBAg/BZPM2O3QfASCAoBBGkVz4E/Pr1CsIioC92eCz4qWLclhc53HgHSCEo9I +XdNCTpCs/oxOXhQ0WQCPFhYEaxU8STgZm0Yhq8WEF1QfxOPOU8nDiwMT0L7/ARruu5bTCxnW +B3kkn+XiO5GldVJhULFlrl91t83yMsTSw+vyCyxZkqewBLR7mqHQUe2suVquMyutxxr2vZgV +QMfRxk65fSvySUHeNaj1dmakYcpP+35iejyUTAtAGuBsv2C68bwif4wkpLpedghNCtmccSdQ +t9QDF3yy6Q42tAW/OK6/t836/qn39f+47Kp4LMJUMmxNrtV7IntIkgBGgnGsqP9Br2B4GYXc +sWK0YApA3+Sf3kfH/wQ6Hib8nN4YxUTxxnS9WNHvRFrXCmfbGd5vAzi4lKCm/W+2Nlpd4DDQ +3JZjjCR73PMfKtHJCGULkNkK/9kRyhLYql2u/ZUJoEcdZxzEpYgExW8Wu1CrCVtWd+ueXs1h +or6Fdua7Gg4cjMgVg6EUSxdMBFQCX8heD8JeG6jMFNR9hTxe8o/PK8Ys63JyLMLRUv3Ud+f8 +8T0TtCZV5+rgLfvb6k89uDJJK228WuJB6rp8S+qqq30RFPmkzW8JNulRilY+wrIfcowA6+TA +T5WKzFOIbkZd/R34tNLJMjTJlUq6SQKaOlQnqOEFbyY/GXgzYgnmc3tl8pigXEJvNzU5GiuB +ib35QQbzh87KlfLtWELK+8ZoyhZAZAMr97IavUbuFubOyEoEozUliARyRZ1ZudM4Ii+J6TRX +cmLryIBlz3OXgUUBSwJPwtWuR4tZ8nIt7cVJr7pxLblGfeFuu01HWN55hv4C78/aNSipVYCF +OFt8n7wQHxbbJvoTIdd/ +[end]""") + f.close() + + + def pull(self, source, target): + server = self.instrumenter.run_bg("server", + ["./mtn-server", + "--rcfile", "server-perms.lua", + "--keydir", "keys", + "-d", source, + "--bind=localhost:12345", + "serve", "*"]) + time.sleep(3) + self.instrumenter.run("pull", + ["./mtn-client", "-d", target, "pull", "localhost:12345", "*"]) + server.kill() + + def init_repo(self, repo): + self.instrumenter.run("init_repo", + ["./mtn", "db", "init", "-d", repo]) + +class PullBenchmark(object): + def __init__(self, repo_source): + self.repo_source = repo_source + + def setup(self, vcs): + vcs.init_repo("target") + self.repo_source.setup() + + def run(self, vcs): + vcs.pull(self.repo_source.repo_path(), "target") + + +class Driver(object): + def __init__(self, scratch, results, testables, benchmarks, instrumenters, debug): + self.scratch = os.path.abspath(scratch) + self.results = os.path.abspath(results) + self.testables = testables + self.benchmarks = benchmarks + self.instrumenters = instrumenters + self.debug = debug + + def run(self): + startdir = os.getcwd() + # FIXME: now make it actually use the self.instrumenters variable... + # also, give things names -- I guess by turning these things into + # dicts + # also, figure out some instance/class/factory naming scheme that + # makes sense + instrumenter_factory = InstrumenterFactory(TimingInstrumenter, 2) + i = 0 + for benchmark in self.benchmarks: + i += 1 + benchdir = os.path.join(self.scratch, str(i)) + + for j in xrange(len(self.testables)): + testable = self.testables[j].new(Instrumenter("")) + setupdir = os.path.join(benchdir, "setup-" + str(j)) + ensure_dir(setupdir) + os.chdir(setupdir) + testable.setup() + benchmark.setup(testable) + + instrumenters = [] + for j in xrange(len(self.testables)): + recorddir = os.path.join(self.results, "%s-%s" % (i, j)) + instrumenters.append(instrumenter_factory.make(recorddir)) + + for repeat in xrange(instrumenter_factory.repeats): + plan = range(len(self.testables)) + random.shuffle(plan) + for j in plan: + instrumenter = instrumenters[j] + testable = self.testables[j].new(instrumenter) + rundir = os.path.join(benchdir, "run-%s-%s" % (j, repeat)) + shutil.copytree(os.path.join(benchdir, "setup-" + str(j)), + rundir) + os.chdir(rundir) + clear_caches() + benchmark.run(testable) + instrumenter.flush() + if not self.debug: + os.chdir(startdir) + shutil.rmtree(rundir) + + if not self.debug: + for j in xrange(len(self.testables)): + shutil.rmtree(os.path.join(benchdir, "setup-" + str(j))) + +def tryit(): + scratch = "scratch" + results = "results" + shutil.rmtree(scratch, True) + shutil.rmtree(results, True) + + testables = [MtnFactory("/home/njs/src/monotone/opt/mtn")] + benchmarks = [PullBenchmark(ExistingRepo("/home/njs/src/monotone/speedtest/t.db"))] + instrumenters = [InstrumenterFactory(TimingInstrumenter, 2)] + debug = 1 + + driver = Driver(scratch, results, testables, benchmarks, instrumenters, debug) + driver.run() + +if __name__ == "__main__": + tryit()