#!/usr/bin/env python3 import csv import multiprocessing import pathlib from pprint import pprint import resource import subprocess import time RUSAGE_FIELDS = [ 'ru_utime', 'ru_stime', 'ru_maxrss', 'ru_ixrss', 'ru_idrss', 'ru_isrss', 'ru_minflt', 'ru_majflt', 'ru_nswap', 'ru_inblock', 'ru_oublock', 'ru_msgsnd', 'ru_msgrcv', 'ru_nsignals', 'ru_nvcsw', 'ru_nivcsw', ] # Path to a modified `find` executable that can run `find -s`. local_find = pathlib.Path.home().joinpath('findutils', 'find', 'find') # Roughly equivalent to '/usr/bin/time {command}'. It has to run in a child # process so that getrusage values don't accumulate as multiple commands are # run. def timed_run(command, env): start = time.time() proc = subprocess.run(command, env=env, timeout=60, capture_output=True, check=True, shell=True) end = time.time() rusage = resource.getrusage(resource.RUSAGE_CHILDREN) if proc.stderr: print(proc.stderr) result = {} result['command'] = command result['elapsed'] = end - start result['start_time'] = start try: result['files'] = int(proc.stdout) except ValueError: pass for field in RUSAGE_FIELDS: result[field] = rusage.__getattribute__(field) return result # Runs `timed_run` in a child process. def run_one(command, *, env={}, **tags): queue = multiprocessing.SimpleQueue() p = multiprocessing.Process( target=lambda: queue.put(timed_run(command, env))) p.start() p.join() result = tags.copy() result.update(env) result.update(queue.get()) return result results = [] # Executes a command a few times and records/reports its results. def run(command, *, caches, env={}, **tags): for _ in range(3): if caches == 'cold': subprocess.run('sync', check=True) subprocess.run('echo 3 | sudo tee /proc/sys/vm/drop_caches', shell=True, check=True) result = run_one(command, env=env, caches=caches, **tags) pprint(result) results.append(result) header = {} for r in results: header.update(r) with open('results.csv', 'w') as f: w = csv.DictWriter(f, fieldnames=header.keys()) w.writeheader() w.writerows(results) if __name__ == '__main__': for caches in ['warm', 'cold']: for tree, find_args in { 'mnt': '/mnt/testdir', 'full': '/ -xdev', 'root': '/ -xdev -path /home -prune -o -print', 'home': '/home', }.items(): for locale in ['C', 'en_US.UTF-8']: kwargs = { 'env': {'LC_ALL': locale}, 'caches': caches, 'tree': tree, } if caches == 'warm': run_one(f'sudo /usr/bin/find {find_args} | wc -l') run(f'sudo /usr/bin/find {find_args} | wc -l', mode='system', **kwargs) run(f'sudo {local_find} {find_args} | wc -l', mode='unsorted', **kwargs) run(f'sudo {local_find} -s {find_args} | wc -l', mode='sortflag', **kwargs) run(f'sudo {local_find} {find_args} | sort | wc -l', mode='sortcmd', **kwargs)