rdiff-backup-users
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[rdiff-backup-users] [PATCH] Use --include and --exclude with --remove-o


From: Josh Nisly
Subject: [rdiff-backup-users] [PATCH] Use --include and --exclude with --remove-older-than
Date: Sun, 26 Oct 2008 21:42:55 -0500
User-agent: Thunderbird 2.0.0.17 (X11/20080925)

Attached is a work-in-progress patch to support the --include and --exclude selection options in concert with --remove-older-than. This provides the functionality described in http://wiki.rdiff-backup.org/wiki/index.php/RemoveOlderThanAllowsSubdirectories and http://wiki.rdiff-backup.org/wiki/index.php/RemoveSpecifiedFiles. I'm mainly posting this for feedback on the implementation, particularly the changes in selection.py and metadata.py.

AFAICT, the main pieces left are mungering the file_statistics files and loosening validation. Regarding the loosened validation, currently there is validation with --remove-older-than that it doesn't remove more than one increment. I think that this should be loosened when using --remove-older-than with selection options, but other than simply removing the check, I don't have a lot of ideas. Thoughts?

Thanks,
JoshN
--- rdiff_backup/Main.py        12 Oct 2008 02:21:29 -0000      1.121
+++ rdiff_backup/Main.py        27 Oct 2008 02:06:49 -0000
@@ -716,10 +716,17 @@
        rootrp = require_root_set(rootrp, 0)
        rot_require_rbdir_base(rootrp)
 
+       # Validate that the selection options are valid
+       for select_opt in select_opts:
+               if select_opt[0] != '--include' and \
+                               select_opt[0] != '--exclude':
+                       Log.FatalError("Only --include and --exclude are "
+                               "supported with --remove-older-than.")
+
        time = rot_check_time(remove_older_than_string)
        if time is None: return
        Log("Actual remove older than time: %s" % (time,), 6)
-       manage.delete_earlier_than(Globals.rbdir, time)
+       manage.delete_earlier_than(Globals.rbdir, time, select_opts)
 
 def rot_check_time(time_string):
        """Check remove older than time_string, return time in seconds"""
--- rdiff_backup/manage.py      7 Jul 2007 22:43:34 -0000       1.13
+++ rdiff_backup/manage.py      27 Oct 2008 02:06:49 -0000
@@ -22,6 +22,7 @@
 from __future__ import generators
 from log import Log
 import Globals, Time, static, statistics, restore, selection, FilenameMapping
+import metadata
 
 
 class ManageException(Exception): pass
@@ -80,7 +81,7 @@
        result.append("Current mirror: %s" % Time.timetopretty(mirror_time))
        return "\n".join(result)
 
-def delete_earlier_than(baserp, time):
+def delete_earlier_than(baserp, time, select_opts):
        """Deleting increments older than time in directory baserp
 
        time is in seconds.  It will then delete any empty directories
@@ -88,9 +89,9 @@
        rdiff-backup-data directory should be the root of the tree.
 
        """
-       baserp.conn.manage.delete_earlier_than_local(baserp, time)
+       baserp.conn.manage.delete_earlier_than_local(baserp, time, select_opts)
 
-def delete_earlier_than_local(baserp, time):
+def delete_earlier_than_local(baserp, time, select_opts):
        """Like delete_earlier_than, but run on local connection for speed"""
        assert baserp.conn is Globals.local_connection
        def yield_files(rp):
@@ -100,13 +101,37 @@
                                        yield sub_rp
                yield rp
 
-       for rp in yield_files(baserp):
-               if ((rp.isincfile() and rp.getinctime() < time) or
-                       (rp.isdir() and not rp.listdir())):
-                       Log("Deleting increment file %s" % rp.path, 5)
-                       rp.delete()
-
+       if not select_opts:
+               # Simple remove. Delete all files with timestamp older
+               # than time.
+               for rp in yield_files(baserp):
+                       if ((rp.isincfile() and rp.getinctime() < time) or
+                               (rp.isdir() and not rp.listdir())):
 
+                               Log("Deleting increment file %s" % rp.path, 5)
+                               rp.delete()
+       else:
+               # Remove with selection options. Remove all increments
+               # that match, then modify metadata files to match.
+               select = selection.Select(baserp.append_path('increments'), 
True)
+               select.ParseArgs(select_opts, [])
+
+               for rp in yield_files(baserp.append_path('increments')):
+                       if ((rp.isincfile() and rp.getinctime() < time) or
+                               (rp.isdir() and not rp.listdir())):
+
+                               if select.Select(rp) == 1: # File matched
+                                       Log("Deleting increment file %s" % 
rp.path, 5)
+                                       rp.delete()
+
+               # Process metadata
+               select = selection.Select(baserp)
+               select.ParseArgs(select_opts, [])
+               metadata.SetManager()
+               def callback(rorp):
+                       return select.Select(rorp) == 1
+               metadata.rewrite_meta_files(time, callback)
+               
 class IncObj:
        """Increment object - represent a completed increment"""
        def __init__(self, incrp):
--- rdiff_backup/metadata.py    27 Sep 2008 00:17:24 -0000      1.32
+++ rdiff_backup/metadata.py    27 Oct 2008 02:06:49 -0000
@@ -429,6 +429,61 @@
        _extractor = RorpExtractor
        _object_to_record = staticmethod(RORP2Record)
 
+def rewrite_meta_files(beforetime, callback):
+       """ Rewrites the various metadata files, removing historical
+       increments for rorp's where callback returns False. 
+
+       This function is more complicated because of the metadata
+       snapshot files. rewrite_meta_files goes from the latest
+       metadata information, and works earlier. Whenever it encounters
+       an entry in a snapshot file, it finds what that information
+       should be, based on later metadata files, and puts that in
+       the new file."""
+
+       meta_base = Globals.rbdir.append_path('mirror_metadata')
+       metatimes = restore.get_inclist(meta_base)
+       metatimes = [file.getinctime() for file in metatimes]
+       metatimes.sort()
+       metatimes.reverse()
+
+       single_manager = Manager()
+
+       prevtime = None
+       for time in metatimes:
+               if time < beforetime:
+                       inc_type = ManagerObj.get_meta_inctype(time)
+                       reader = single_manager.GetAtTime(time, None)
+                       writer = ManagerObj.GetWriter(inc_type, time)
+                       if inc_type != 'snapshot':
+                               # We're rewriting a diff file. Since the 
entries in these
+                               # files work like the increments (they only 
exist if
+                               # there's a change), we can just write the 
entries that
+                               # don't match the callback.
+                               for rorp in reader:
+                                       if not callback(rorp):
+                                               writer.write_object(rorp)
+                       else:
+                               # We're rewriting a snapshot file. Iterate 
through both
+                               # this snapshot and the metadata as it existed 
at the
+                               # previous backup. For each rorp, if it matches 
the
+                               # callback, use the previous version 
(effectively
+                               # removing this backup's increment), otherwise 
use this
+                               # backup's version.
+                               assert not prevtime is None
+                               prev_reader = ManagerObj.GetAtTime(prevtime, 
None)
+                               iter = rorpiter.Collate2Iters(reader, 
prev_reader)
+                               for this_rorp, prev_rorp in iter:
+                                       rorp = this_rorp or prev_rorp
+                                       if callback(rorp):
+                                               # Use previous version
+                                               if prev_rorp:
+                                                       
writer.write_object(prev_rorp)
+                                       else:
+                                               if this_rorp:
+                                                       
writer.write_object(this_rorp)
+
+                       writer.close()
+               prevtime = time
 
 class CombinedWriter:
        """Used for simultaneously writting metadata, eas, and acls"""
@@ -482,6 +537,12 @@
                if self.prefixmap.has_key(incbase): 
self.prefixmap[incbase].append(rp)
                else: self.prefixmap[incbase] = [rp]
 
+       def get_meta_inctype(self, time):
+               metas = filter(lambda x: x.getinctime() == time,
+                                       self.prefixmap['mirror_metadata'])
+               assert len(metas) == 1, metas
+               return metas[0].getinctype()
+
        def _iter_helper(self, prefix, flatfileclass, time, restrict_index):
                """Used below to find the right kind of file by time"""
                if not self.timerpmap.has_key(time): return None
@@ -690,3 +751,4 @@
 
 
 import eas_acls, win_acls # put at bottom to avoid python circularity bug
+import restore
--- rdiff_backup/selection.py   4 Sep 2008 23:36:20 -0000       1.47
+++ rdiff_backup/selection.py   27 Oct 2008 02:06:49 -0000
@@ -79,12 +79,13 @@
        # This re should not match normal filenames, but usually just globs
        glob_re = re.compile("(.*[*?[\\\\]|ignorecase\\:)", re.I | re.S)
 
-       def __init__(self, rootrp):
+       def __init__(self, rootrp, use_incr_name=False):
                """Select initializer.  rpath is the root directory"""
                assert isinstance(rootrp, rpath.RPath)
                self.selection_functions = []
                self.rpath = rootrp
                self.prefix = self.rpath.path
+               self.use_incr_name = use_incr_name
 
        def set_iter(self, sel_func = None):
                """Initialize more variables, get ready to iterate
@@ -537,6 +538,15 @@
                sel_func.name = "%s size %d" % (min_max and "Maximum" or 
"Minimum", size)
                return sel_func
 
+       def get_filename(self, rp):
+               if self.use_incr_name and rp.isincfile():
+                       return rp.getincbase().path
+               else:
+                       if hasattr(rp, 'path'):
+                               return rp.path
+                       else:
+                               return self.prefix + '/'.join(rp.index)
+
        def glob_get_sf(self, glob_str, include):
                """Return selection function given by glob string"""
                assert include == 0 or include == 1
@@ -614,12 +624,12 @@
                                                           
"|".join(self.glob_get_prefix_res(glob_str)))
 
                def include_sel_func(rp):
-                       if glob_comp_re.match(rp.path): return 1
-                       elif scan_comp_re.match(rp.path): return 2
+                       if glob_comp_re.match(self.get_filename(rp)): return 1
+                       elif scan_comp_re.match(self.get_filename(rp)): return 2
                        else: return None
 
                def exclude_sel_func(rp):
-                       if glob_comp_re.match(rp.path): return 0
+                       if glob_comp_re.match(self.get_filename(rp)): return 0
                        else: return None
 
                # Check to make sure prefix is ok

reply via email to

[Prev in Thread] Current Thread [Next in Thread]