Differences between revisions 4 and 5
Revision 4 as of 2008-03-24 13:11:48
Size: 3538
Editor: abuehl
Comment: +links
Revision 5 as of 2009-05-19 19:30:55
Size: 3542
Editor: localhost
Comment: converted to 1.6 markup
Deletions are marked like this. Additions are marked like this.
Line 3: Line 3:
When [:Repository:repositories] are [:Clone:cloned] locally, their data files will be hardlinked so that they only use the space of a single repository. When [[Repository|repositories]] are [[Clone|cloned]] locally, their data files will be hardlinked so that they only use the space of a single repository.
Line 5: Line 5:
Unfortunately, subsequent [:Pull:pulls] into either repository will break hardlinks for any files touched by the new [:ChangeSet:changesets], even if both repositories end up pulling the same changes. Unfortunately, subsequent [[Pull|pulls]] into either repository will break hardlinks for any files touched by the new [[ChangeSet|changesets]], even if both repositories end up pulling the same changes.

When repositories are cloned locally, their data files will be hardlinked so that they only use the space of a single repository.

Unfortunately, subsequent pulls into either repository will break hardlinks for any files touched by the new changesets, even if both repositories end up pulling the same changes.

Here's a quick and dirty way to recreate those hardlinks and reclaim that wasted space (this script is also available as contrib/hg-relink in the source tarball):

import os, sys

class ConfigError(Exception): pass

def usage():
    print """relink <source> <destination>
    Hard-link files from source to destination"""

class Config:
    def __init__(self, args):
        if len(args) != 3:
            raise ConfigError("wrong number of arguments")
        self.src = os.path.abspath(args[1])
        self.dst = os.path.abspath(args[2])
        for d in (self.src, self.dst):
            if not os.path.exists(os.path.join(d, '.hg')):
                raise ConfigError("%s: not a mercurial repository" % d)

try:
    cfg = Config(sys.argv)
except ConfigError, inst:
    print str(inst)
    usage()
    sys.exit(1)

relinked = 0
savedbytes = 0
CHUNKLEN = 4096

def collect(src):
    seplen = len(os.path.sep)
    candidates = []
    for dirpath, dirnames, filenames in os.walk(src):
        relpath = dirpath[len(src) + seplen:]
        for filename in filenames:
            if not (filename.endswith('.i') or filename.endswith('.d')):
                continue
            st = os.stat(os.path.join(dirpath, filename))
            candidates.append((os.path.join(relpath, filename), st))

    return candidates

def prune(candidates, dst):
    targets = []
    for fn, st in candidates:
        tgt = os.path.join(dst, fn)
        try:
            ts = os.stat(tgt)
        except OSError:
            # Destination doesn't have this file?
            continue
        if st.st_ino == ts.st_ino:
            continue
        if st.st_dev != ts.st_dev:
            raise Exception('Source and destination are on different devices')
        if st.st_size != ts.st_size:
            continue
        targets.append((fn, ts.st_size))

    return targets

def relink(src, dst, files):
    CHUNKLEN = 65536
    relinked = 0
    savedbytes = 0

    for f, sz in files:
        source = os.path.join(src, f)
        tgt = os.path.join(dst, f)
        sfp = file(source)
        dfp = file(tgt)
        sin = sfp.read(CHUNKLEN)
        while sin:
            din = dfp.read(CHUNKLEN)
            if sin != din:
                break
            sin = sfp.read(CHUNKLEN)
        if sin:
            continue
        try:
            os.rename(tgt, tgt + '.bak')
            try:
                os.link(source, tgt)
            except OSError:
                os.rename(tgt + '.bak', tgt)
                raise
            print 'Relinked %s' % f
            relinked += 1
            savedbytes += sz
            os.remove(tgt + '.bak')
        except OSError, inst:
            print '%s: %s' % (tgt, str(inst))

    print 'Relinked %d files (%d bytes reclaimed)' % (relinked, savedbytes)

src = os.path.join(cfg.src, '.hg')
dst = os.path.join(cfg.dst, '.hg')
candidates = collect(src)
targets = prune(candidates, dst)
relink(src, dst, targets)


CategoryTipsAndTricks

RelinkExtension (last edited 2020-05-30 04:17:27 by aayjaychan)