changeset 1677:11d12bd6e1dc

cleanup of revlog.group when repository is local revlog.group cached every chunk from the revlog, the behaviour was needed to minimize the roundtrip with old-http. The patch export the information that the repository is local or not from the repository object down to the revlog. Then it uses the workaround for old-http only if the repository is non-local. The memory used server side when pulling goes down to less than 30Mo maximum whereas without the patch more than 160Mo was used when cloning the linux kernel repository. The time used by cloning is roughly the same (although some caching could be implemented if needed): before 110.25user 20.90system 2:52.00elapsed 76%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+708707minor)pagefaults 0swaps after 112.85user 22.98system 2:50.66elapsed 79%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+862862minor)pagefaults 0swaps
author Benoit Boissinot <benoit.boissinot@ens-lyon.org>
date Thu, 08 Dec 2005 15:12:02 +0100
parents 63799b01985c
children b345cc4c22c0
files mercurial/changelog.py mercurial/filelog.py mercurial/localrepo.py mercurial/manifest.py mercurial/revlog.py mercurial/statichttprepo.py
diffstat 6 files changed, 77 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/changelog.py	Tue Dec 06 14:10:38 2005 +0100
+++ b/mercurial/changelog.py	Thu Dec 08 15:12:02 2005 +0100
@@ -11,8 +11,9 @@
 demandload(globals(), "os time util")
 
 class changelog(revlog):
-    def __init__(self, opener):
-        revlog.__init__(self, opener, "00changelog.i", "00changelog.d")
+    def __init__(self, opener, local=True):
+        revlog.__init__(self, opener, "00changelog.i", "00changelog.d",
+                        local=local)
 
     def extract(self, text):
         if not text:
--- a/mercurial/filelog.py	Tue Dec 06 14:10:38 2005 +0100
+++ b/mercurial/filelog.py	Thu Dec 08 15:12:02 2005 +0100
@@ -11,10 +11,11 @@
 demandload(globals(), "bdiff")
 
 class filelog(revlog):
-    def __init__(self, opener, path):
+    def __init__(self, opener, path, local=True):
         revlog.__init__(self, opener,
                         os.path.join("data", self.encodedir(path + ".i")),
-                        os.path.join("data", self.encodedir(path + ".d")))
+                        os.path.join("data", self.encodedir(path + ".d")),
+                        local=local)
 
     # This avoids a collision between a file named foo and a dir named
     # foo.i or foo.d
--- a/mercurial/localrepo.py	Tue Dec 06 14:10:38 2005 +0100
+++ b/mercurial/localrepo.py	Thu Dec 08 15:12:02 2005 +0100
@@ -30,8 +30,8 @@
         self.ui = ui
         self.opener = util.opener(self.path)
         self.wopener = util.opener(self.root)
-        self.manifest = manifest.manifest(self.opener)
-        self.changelog = changelog.changelog(self.opener)
+        self.manifest = manifest.manifest(self.opener, local=self.local())
+        self.changelog = changelog.changelog(self.opener, local=self.local())
         self.tagscache = None
         self.nodetagscache = None
         self.encodepats = None
@@ -161,7 +161,7 @@
 
     def file(self, f):
         if f[0] == '/': f = f[1:]
-        return filelog.filelog(self.opener, f)
+        return filelog.filelog(self.opener, f, local=self.local())
 
     def getcwd(self):
         return self.dirstate.getcwd()
--- a/mercurial/manifest.py	Tue Dec 06 14:10:38 2005 +0100
+++ b/mercurial/manifest.py	Thu Dec 08 15:12:02 2005 +0100
@@ -12,10 +12,11 @@
 demandload(globals(), "bisect array")
 
 class manifest(revlog):
-    def __init__(self, opener):
+    def __init__(self, opener, local=True):
         self.mapcache = None
         self.listcache = None
-        revlog.__init__(self, opener, "00manifest.i", "00manifest.d")
+        revlog.__init__(self, opener, "00manifest.i", "00manifest.d",
+                        local=local)
 
     def read(self, node):
         if node == nullid: return {} # don't upset local cache
--- a/mercurial/revlog.py	Tue Dec 06 14:10:38 2005 +0100
+++ b/mercurial/revlog.py	Thu Dec 08 15:12:02 2005 +0100
@@ -177,7 +177,7 @@
     remove data, and can use some simple techniques to avoid the need
     for locking while reading.
     """
-    def __init__(self, opener, indexfile, datafile):
+    def __init__(self, opener, indexfile, datafile, local=True):
         """
         create a revlog object
 
@@ -188,6 +188,7 @@
         self.datafile = datafile
         self.opener = opener
         self.cache = None
+        self.local = local # XXX only needed because statichttp
 
         try:
             i = self.opener(self.indexfile).read()
@@ -650,7 +651,7 @@
                 #print "next x"
                 gx = x.next()
 
-    def group(self, nodelist, lookup, infocollect = None):
+    def group(self, nodelist, lookup, infocollect=None):
         """calculate a delta group
 
         Given a list of changeset revs, return a set of deltas and
@@ -660,7 +661,6 @@
         changesets. parent is parent[0]
         """
         revs = [self.rev(n) for n in nodelist]
-        needed = dict.fromkeys(revs, 1)
 
         # if we don't have any revisions touched by these changesets, bail
         if not revs:
@@ -671,59 +671,70 @@
         p = self.parents(self.node(revs[0]))[0]
         revs.insert(0, self.rev(p))
 
-        # for each delta that isn't contiguous in the log, we need to
-        # reconstruct the base, reconstruct the result, and then
-        # calculate the delta. We also need to do this where we've
-        # stored a full version and not a delta
-        for i in xrange(0, len(revs) - 1):
-            a, b = revs[i], revs[i + 1]
-            if a + 1 != b or self.base(b) == b:
-                for j in xrange(self.base(a), a + 1):
-                    needed[j] = 1
-                for j in xrange(self.base(b), b + 1):
-                    needed[j] = 1
+        if self.local:
+            mm = self.opener(self.datafile)
+            def chunk(r):
+                o = self.start(r)
+                l = self.length(r)
+                mm.seek(o)
+                return decompress(mm.read(l))
+        else:
+            # XXX: statichttp workaround
+            needed = dict.fromkeys(revs[1:], 1)
+            # for each delta that isn't contiguous in the log, we need to
+            # reconstruct the base, reconstruct the result, and then
+            # calculate the delta. We also need to do this where we've
+            # stored a full version and not a delta
+            for i in xrange(0, len(revs) - 1):
+                a, b = revs[i], revs[i + 1]
+                if a + 1 != b or self.base(b) == b:
+                    for j in xrange(self.base(a), a + 1):
+                        needed[j] = 1
+                    for j in xrange(self.base(b), b + 1):
+                        needed[j] = 1
 
-        # calculate spans to retrieve from datafile
-        needed = needed.keys()
-        needed.sort()
-        spans = []
-        oo = -1
-        ol = 0
-        for n in needed:
-            if n < 0: continue
-            o = self.start(n)
-            l = self.length(n)
-            if oo + ol == o: # can we merge with the previous?
-                nl = spans[-1][2]
-                nl.append((n, l))
-                ol += l
-                spans[-1] = (oo, ol, nl)
-            else:
-                oo = o
-                ol = l
-                spans.append((oo, ol, [(n, l)]))
+            # calculate spans to retrieve from datafile
+            needed = needed.keys()
+            needed.sort()
+            spans = []
+            oo = -1
+            ol = 0
+            for n in needed:
+                if n < 0: continue
+                o = self.start(n)
+                l = self.length(n)
+                if oo + ol == o: # can we merge with the previous?
+                    nl = spans[-1][2]
+                    nl.append((n, l))
+                    ol += l
+                    spans[-1] = (oo, ol, nl)
+                else:
+                    oo = o
+                    ol = l
+                    spans.append((oo, ol, [(n, l)]))
 
-        # read spans in, divide up chunks
-        chunks = {}
-        for span in spans:
-            # we reopen the file for each span to make http happy for now
-            f = self.opener(self.datafile)
-            f.seek(span[0])
-            data = f.read(span[1])
+            # read spans in, divide up chunks
+            chunks = {}
+            for span in spans:
+                # we reopen the file for each span to make http happy for now
+                f = self.opener(self.datafile)
+                f.seek(span[0])
+                data = f.read(span[1])
 
-            # divide up the span
-            pos = 0
-            for r, l in span[2]:
-                chunks[r] = decompress(data[pos: pos + l])
-                pos += l
+                # divide up the span
+                pos = 0
+                for r, l in span[2]:
+                    chunks[r] = decompress(data[pos: pos + l])
+                    pos += l
+            def chunk(r):
+                return chunks[r]
 
         # helper to reconstruct intermediate versions
         def construct(text, base, rev):
-            bins = [chunks[r] for r in xrange(base + 1, rev + 1)]
+            bins = [chunk(r) for r in xrange(base + 1, rev + 1)]
             return mdiff.patches(text, bins)
 
         # build deltas
-        deltas = []
         for d in xrange(0, len(revs) - 1):
             a, b = revs[d], revs[d + 1]
             n = self.node(b)
@@ -735,7 +746,7 @@
             if a + 1 != b or self.base(b) == b:
                 if a >= 0:
                     base = self.base(a)
-                    ta = chunks[self.base(a)]
+                    ta = chunk(self.base(a))
                     ta = construct(ta, base, a)
                 else:
                     ta = ""
@@ -745,11 +756,11 @@
                     base = a
                     tb = ta
                 else:
-                    tb = chunks[self.base(b)]
+                    tb = chunk(self.base(b))
                 tb = construct(tb, base, b)
                 d = self.diff(ta, tb)
             else:
-                d = chunks[b]
+                d = chunk(b)
 
             p = self.parents(n)
             meta = n + p[0] + p[1] + lookup(n)
--- a/mercurial/statichttprepo.py	Tue Dec 06 14:10:38 2005 +0100
+++ b/mercurial/statichttprepo.py	Thu Dec 08 15:12:02 2005 +0100
@@ -31,10 +31,12 @@
         self.path = (path + "/.hg")
         self.ui = ui
         self.opener = opener(self.path)
-        self.manifest = manifest.manifest(self.opener)
-        self.changelog = changelog.changelog(self.opener)
+        self.manifest = manifest.manifest(self.opener, local=self.local())
+        self.changelog = changelog.changelog(self.opener, local=self.local())
         self.tagscache = None
         self.nodetagscache = None
+        self.encodepats = None
+        self.decodepats = None
 
     def dev(self):
         return -1