Size: 3538
Comment: +links
|
Size: 3542
Comment: converted to 1.6 markup
|
Deletions are marked like this. | Additions are marked like this. |
Line 3: | Line 3: |
When [:Repository:repositories] are [:Clone:cloned] locally, their data files will be hardlinked so that they only use the space of a single repository. | When [[Repository|repositories]] are [[Clone|cloned]] locally, their data files will be hardlinked so that they only use the space of a single repository. |
Line 5: | Line 5: |
Unfortunately, subsequent [:Pull:pulls] into either repository will break hardlinks for any files touched by the new [:ChangeSet:changesets], even if both repositories end up pulling the same changes. | Unfortunately, subsequent [[Pull|pulls]] into either repository will break hardlinks for any files touched by the new [[ChangeSet|changesets]], even if both repositories end up pulling the same changes. |
Recreate hardlinks between two Mercurial repositories
When repositories are cloned locally, their data files will be hardlinked so that they only use the space of a single repository.
Unfortunately, subsequent pulls into either repository will break hardlinks for any files touched by the new changesets, even if both repositories end up pulling the same changes.
Here's a quick and dirty way to recreate those hardlinks and reclaim that wasted space (this script is also available as contrib/hg-relink in the source tarball):
import os, sys class ConfigError(Exception): pass def usage(): print """relink <source> <destination> Hard-link files from source to destination""" class Config: def __init__(self, args): if len(args) != 3: raise ConfigError("wrong number of arguments") self.src = os.path.abspath(args[1]) self.dst = os.path.abspath(args[2]) for d in (self.src, self.dst): if not os.path.exists(os.path.join(d, '.hg')): raise ConfigError("%s: not a mercurial repository" % d) try: cfg = Config(sys.argv) except ConfigError, inst: print str(inst) usage() sys.exit(1) relinked = 0 savedbytes = 0 CHUNKLEN = 4096 def collect(src): seplen = len(os.path.sep) candidates = [] for dirpath, dirnames, filenames in os.walk(src): relpath = dirpath[len(src) + seplen:] for filename in filenames: if not (filename.endswith('.i') or filename.endswith('.d')): continue st = os.stat(os.path.join(dirpath, filename)) candidates.append((os.path.join(relpath, filename), st)) return candidates def prune(candidates, dst): targets = [] for fn, st in candidates: tgt = os.path.join(dst, fn) try: ts = os.stat(tgt) except OSError: # Destination doesn't have this file? continue if st.st_ino == ts.st_ino: continue if st.st_dev != ts.st_dev: raise Exception('Source and destination are on different devices') if st.st_size != ts.st_size: continue targets.append((fn, ts.st_size)) return targets def relink(src, dst, files): CHUNKLEN = 65536 relinked = 0 savedbytes = 0 for f, sz in files: source = os.path.join(src, f) tgt = os.path.join(dst, f) sfp = file(source) dfp = file(tgt) sin = sfp.read(CHUNKLEN) while sin: din = dfp.read(CHUNKLEN) if sin != din: break sin = sfp.read(CHUNKLEN) if sin: continue try: os.rename(tgt, tgt + '.bak') try: os.link(source, tgt) except OSError: os.rename(tgt + '.bak', tgt) raise print 'Relinked %s' % f relinked += 1 savedbytes += sz os.remove(tgt + '.bak') except OSError, inst: print '%s: %s' % (tgt, str(inst)) print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes) src = os.path.join(cfg.src, '.hg') dst = os.path.join(cfg.dst, '.hg') candidates = collect(src) targets = prune(candidates, dst) relink(src, dst, targets)