view hgext/gitserve.py @ 3465:e07147ab4de4

read rev list from the argument
author Josef "Jeff" Sipek <jeffpc@josefsipek.net>
date Sat, 21 Oct 2006 18:58:32 -0400
parents ec4d38e2777d
children 7d0b4aa72322
line wrap: on
line source

# gitserve.py - run a git-compatible server
#
# Copyright 2006 Josef "Jeff" Sipek <jeffpc@josefsipek.net>
#
# This software may be used and distributed according to the terms
# of the GNU General Public License, incorporated herein by reference.

from mercurial.demandload import *
demandload(globals(), 'sys signal os socket sha zlib')
demandload(globals(), 'mercurial:hg,fancyopts,commands,ui,node')

initial_hash = "be0d3f608eac6a979da48a9c1774ede9aa13cb25"
initial = "\x50\x41\x43\x4b\x00\x00\x00\x02\x00\x00\x00\x09\x9f\x0c\x78\x9c\x95\xcb\x3d\x0e\xc2\x30\x0c\x40\xe1\x3d\xa7\xc8\x08\x03\xc8\x89\x9d\xa6\x91\x10\x62\xe6\x16\x8e\xe3\x50\x0a\xfd\x51\x09\xf7\xa7\x57\x60\x79\xc3\x27\xbd\xb6\xa9\x5a\xd5\xcc\xca\x5a\x5c\x57\x8a\x47\x80\x88\xd1\xfb\x2a\x21\xba\x3e\xfa\x5e\x05\x52\xaa\x98\x89\xcc\xca\x9b\xce\xcd\x42\x8f\x01\x91\x80\x50\x81\xc4\xfb\xdc\xf9\x4e\x42\x09\xbc\x63\x22\x91\x92\x19\x5d\x89\x62\xf8\xdb\x86\x65\xb3\x77\xad\xd5\x5e\xc6\xbd\xab\xdc\x32\xb7\xf7\x70\x3e\xcc\xcb\xac\xc7\xab\x75\x2e\x04\x4a\x89\x30\xd9\x13\x10\x80\x91\x65\x9a\x9e\xad\xe9\x7f\x97\x79\x3d\xc6\x4f\x1d\xcc\x0f\x85\xde\x3b\xf5\x9d\x0c\x78\x9c\x95\xcb\x3b\x0e\xc2\x30\x0c\x00\xd0\x3d\xa7\xc8\x08\x03\xc8\x4e\xec\xa4\x91\x10\x42\x8c\xdc\xc2\x75\x1c\x15\x44\x3f\xaa\xc2\xfd\xe1\x0a\x2c\x6f\x7b\x7d\x37\xf3\x12\x42\x51\xe4\x61\x8c\x95\x39\x81\x2a\x91\xd4\x58\x4d\x53\x6b\x5c\x31\x0a\x65\x43\xd4\xe6\x36\xd9\x6d\xe9\x5e\x32\x13\x88\x72\x00\x8e\x39\x87\x94\x41\x07\x1e\x5a\xb0\x26\x5c\x09\x41\x52\x46\x1b\x4b\x74\xf2\xe9\xd3\xba\xfb\x87\xb5\xe6\x2f\xaf\x9f\x9b\xde\x46\xe9\xef\xe9\x7c\x58\xd6\xc5\x8e\x57\x8f\xc8\x4c\xa5\x10\x14\x7f\x02\x02\x70\xba\xce\xf3\xb3\x77\xfb\x6f\xb9\xfb\x5b\x26\xf7\x05\x2d\xed\x3b\x21\x97\x0a\x78\x9c\x95\xcb\x31\x0e\xc2\x30\x0c\x00\xc0\x3d\xaf\xf0\x08\x03\xc8\x0d\x4e\x14\x4b\x15\x62\x85\x5f\xc4\xa9\xa3\x06\xb5\x29\xaa\xcc\xff\x41\xe2\x05\x2c\xb7\x9d\xed\xaa\x40\x92\x7c\x98\x4a\x24\x5f\x24\xaa\x70\xc6\x88\x1a\x48\x6a\x9a\x22\xfb\x94\xaa\xa8\x12\x23\xb9\xfc\xb6\x79\xdb\xe1\xa1\xb5\xc2\xf8\xfc\xfa\x2a\x37\xc9\xb6\xcc\xe7\x43\xdf\xba\x1e\xaf\x30\x0c\x21\x10\xf3\x85\x11\x4e\x48\x88\xae\x6c\xeb\xda\xcc\xf4\xbf\xe5\xee\xbd\x59\xcb\x0b\xfc\xba\xfb\x00\x78\xa8\x31\xc5\xae\x03\x78\x9c\x33\x34\x30\x30\x33\x31\x51\x48\x4a\x2c\x62\xd8\x91\x94\xe3\xf3\x5f\xc3\x33\xc9\x7f\x5b\x7d\xfb\x59\xae\xeb\xda\x62\x66\x85\x6b\x0d\x21\xd2\x69\xf9\xf9\x0c\x82\x13\x55\xaa\x73\x67\x29\xe4\x67\x2f\xbe\xb1\xa6\xf2\x51\x2e\x8b\xec\x8d\x4c\x47\x00\xa1\x0a\x16\xde\x32\x78\x9c\x33\xe1\x02\x00\x00\x74\x00\x3f\x34\x78\x9c\x33\xe4\x32\xe2\x02\x00\x01\x54\x00\x78\xaf\x01\x78\x9c\x33\x34\x30\x30\x33\x31\x51\x48\xcb\xcf\x67\xb8\xc0\x32\xf1\x6f\x5d\xf4\xb6\x5f\x1a\x47\xc5\x17\xec\x36\xda\xb1\x95\x2d\x78\xae\x2f\x00\xb3\xe7\x0c\xc0\x32\x78\x9c\x33\xe4\x02\x00\x00\x6e\x00\x3c\x20\x78\x9c\x03\x00\x00\x00\x00\x01\xa9\xa2\x06\xae\x1a\x3a\xc0\x6d\x3f\x86\xa9\x3e\xe6\x91\x35\x78\x43\x36\x96\x3d"
partial_hash = "f9368987425b1d2b3084d7f3dc4846dd93ea60de"
partial = "\x50\x41\x43\x4b\x00\x00\x00\x02\x00\x00\x00\x03\x9d\x0c\x78\x9c\x95\xcb\x3b\x0e\xc2\x30\x0c\x00\xd0\x3d\xa7\xc8\x08\x03\xc8\x6e\x3e\x76\x24\x84\x98\xb9\x85\x93\x38\x2a\x85\x7e\x54\xc2\xfd\xe1\x0a\x2c\x6f\x7b\x7d\x57\xb5\x02\x4e\x45\xab\x73\x50\x20\x43\x2d\x03\x44\x42\x6a\xd4\x42\xe6\x4c\xe8\x95\x63\x05\x66\x6f\x36\xd9\x75\xe9\x36\x2b\x54\xd7\x22\xb0\x4a\x89\x92\x28\x55\xf1\x2c\xa9\x20\x91\xd7\xaa\x49\x04\x5d\xc9\x43\x30\xf2\xe9\xe3\xba\xdb\xbb\xb6\x66\x2f\xd3\xcf\xad\xdc\xb2\xf4\xd7\x78\x3e\x2c\xeb\xa2\xc7\xab\x45\x0c\x21\x20\x70\x04\x7b\x02\x0f\x60\xca\x3a\xcf\x8f\xde\xf5\xbf\x65\x9e\xd3\xf8\x36\x5f\x8f\x05\x3c\x06\xae\x03\x78\x9c\x33\x34\x30\x30\x33\x31\x51\x48\x4a\x2c\x62\xd8\x91\x94\xe3\xf3\x5f\xc3\x33\xc9\x7f\x5b\x7d\xfb\x59\xae\xeb\xda\x62\x66\x85\x6b\x0d\x21\xd2\x69\xf9\xf9\x0c\x8c\xcf\xe7\x18\xad\x38\x39\x27\xb4\x9e\x3d\xfc\x56\xcd\xb6\x6b\xd1\x26\x22\x6e\xb9\x00\xad\x7d\x17\x37\x36\x78\x9c\x33\xe4\x32\xe2\x32\xe6\x02\x00\x02\xb4\x00\xb5\x5c\xfb\xea\x0c\x9c\xd3\x0f\xbb\x69\x08\x88\x28\x3b\x93\x5d\xc0\xa2\xa0\x36\xe8"

def safe_read(sock):
    def do_read(sock, rlen):
	buf = ""
	while len(buf) < rlen:
            rbuf = sock.recv(rlen - len(buf))
	    buf += rbuf

	    if not rbuf:
	        raise IOError

        print "do_read: got %d bytes '%s'" % (len(buf), buf)
	return buf

    def unhex(s):
        def unhexdigit(c):
	    if c >= '0' and c <= '9':
                return (ord(c) - ord('0'))
	    if c >= 'A' and c <= 'Z':
                return (ord(c) - ord('A') + 10)
            if c >= 'a' and c <= 'z':
	        return (ord(c) - ord('a') + 10)
            print "Unknown hex digit '%s' (%d, %02x)" % (c, ord(c), ord(c))
            raise ValueError

        r = 0
	for x in s:
		r = (r << 4) | unhexdigit(x)
        return r

    size = unhex(do_read(sock, 4))
    if size == 0:
        # this is a flush command, fake a nicer way for layers above us
	return "flush"

    size -= 4 # size includes the size field

    buf = do_read(sock, size)
    print "received %d bytes: '%s'" % (size, buf)

    return buf

def safe_write(sock, buf):
    data = "%04x%s" % (len(buf)+4, buf)

    print "sending  %d bytes: '%s'" % (len(data), data)
    sock.send(data)

def raw_write(sock, buf):
    print "sending %d bytes of raw data" % (len(buf),)
    sock.send(buf)

def send_flush(sock):
    raw_write(sock,"0000")

def send_head_ref(sock):
    "send info for head ref"

    caps = "multi_ack thin-pack"
    name = "HEAD"
    hash = partial_hash

    safe_write(sock, "%s %s\0%s\n" % (hash, name, caps))

def send_all_refs(sock):
    "send info for each ref"

    hash = partial_hash
    name = "master"

    safe_write(sock, "%s refs/heads/%s\n" % (hash, name))

def service(ui, repo, sock):
    "Service a client request"

    def get_wants(sock):
        "get the list of what's wanted"
	wants = []
	flags = {"multi_ack": False, "thin-pack": False, "side-band": False}
	while True:
            buf = safe_read(sock)
            if buf == "flush":
                break

            buf = buf.strip()
            try:
                (cmd, hash, flagstr) = buf.split(" ", 2)
            except ValueError:
                 (cmd, hash) = buf.split(" ", 1)
                 flagstr = ""

            if cmd != "want":
                raise ValueError

            try:
                idx = wants.index(hash)
            except ValueError:
                wants.append(hash)
            
	    for f in flagstr.split(" "):
	        flags[f] = True

        print "wants: %s\nflags: %s" % (str(wants), str(flags))
        return (wants, flags)

    def get_haves(sock, flags):
        "get the list of what the client has"
	haves = []
	while True:
            buf = safe_read(sock)
	    if buf == "flush":
                if not len(haves) or flags["multi_ack"]:
                    safe_write(sock, "NAK\n")
                continue

            buf = buf.strip()
            try:
                (cmd, hash) = buf.split(" ", 1)
            except ValueError:
                cmd = buf
                hash = ""

            if cmd == "have":
                # FIXME: if we know that they have this commit object, don't
		# add it to the list, if we didn't know that they had it,
		# add it, and mark all it's parents so that we know that
		# they have them if we encounter their hashes
                haves.append(hash)
                if flags["multi_ack"] or len(haves) == 1:
                    safe_write(sock, "ACK %s%s\n" % (hash, flags["multi_ack"] and " continue" or ""))

            elif cmd == "done":
                if len(haves):
                    if flags["multi_ack"]:
                        safe_write(sock, "ACK %s\n" % (haves[-1],))
                    break
                safe_write(sock, "NAK\n")
                break

            else:
                raise ValueError

        print "haves: %s" % (str(haves),)
        return haves

    def create_pack(sock, want, have, flags):
        "create the pack, and send it to the client"

        # test pack
        raw_write(sock, partial)

    # first let's wait for a greeting
    buf = safe_read(sock)
    (cmd, dir) = buf.split(" ", 1)

    if cmd != "git-upload-pack":
        print "Invalid git protocol!"
	sock.close()
	return

    print "We are supposed to use repo at '%s' - ignoring" % (dir,)

    # tell the client what we have
    send_head_ref(sock)
    send_all_refs(sock)
    send_flush(sock)

    # upload pack code
    (want, flags) = get_wants(sock)
    if len(want):
        have = get_haves(sock, flags)
        create_pack(sock, want, have, flags)

    sock.close()
    print "Closed connection."

def gitserve(ui, repo):
    "Serve repository via git protocol"
    
    listen_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listen_sock.bind(("0.0.0.0", 9418))
    listen_sock.listen(10) # backlog of 10 connections

    try:
        while True:
            sock, addr = listen_sock.accept()
	    service(ui, repo, sock)
    except KeyboardInterrupt:
        # we should shutdown now
	listen_sock.close()

def hash_object(type, str):
    "type should be one of \"blob\", \"tree\", \"commit\", \"tag\""
    return sha.new("%s %d\0%s" % (type, len(str), str)).hexdigest()

def compress_object(type, s):
    """type is a string identifying the type of the object, s contains the
    data of the object; returns string containing the data of the object
    file"""
    return zlib.compress("%s %d\0%s" % (type, len(s), s))

def write_object(name, data):
    try:
        os.mkdir(".git/objects/%s" % (name[0:2],))
    except:
        pass
    f = open(".git/objects/%s/%s" % (name[0:2], name[2:]), "w")
    f.write(data)
    f.close()

def make_file_object(repo, name, hghash):
    f = repo.file(name).read(hghash)
    hash = hash_object("blob", f)
    write_object(hash, compress_object("blob", f))
    print "making blob %s (%s)" % (hash, name)
    return hash

def make_tree_object(treedesc):
    hash = hash_object("tree", treedesc)
    write_object(hash, compress_object("tree", treedesc))
    print "making tree %s" % (hash,)
    return hash

def make_commit_object(tree, author, committer, dts, hgmsg, parents):
    msg  = "tree %s\n" % (tree,)
    for p in parents:
        if p == node.nullid:
            continue
        msg += "parent %s\n" % (h2g[p],)
    msg += "author %s %s\n" % (author, dts)
    if committer:
        print "got commiter: \"%s\"" % (committer,)
        msg += "committer %s\n\n" % (committer, )
    else:
        print "faking committer: \"%s %s\"" % (author, dts)
        msg += "committer %s %s\n\n" % (author, dts)
    msg += "%s" % (hgmsg,)

    hash = hash_object("commit", msg)
    write_object(hash, compress_object("commit", msg))
    print "making commit %s" % (hash,)
    return hash

g2h = {}
h2g = {}

def gitobject(ui, repo, rev, usemap=True):
    "convert a mercurial \"object\" to a git object and all its parents"
    def tz(t):
        return "%s%02d%02d" % ((t > 0 and "-" or ""), int(t/60.0**2), int((t/60.0))%60)

    def load_map():
        try:
            f = open("map", "r")
	    for l in f.readlines():
                (g, h) = l.strip().split(" ")

                g2h[g] = node.bin(h)
                h2g[node.bin(h)] = g

            f.close()
        except:
            pass

    def save_map():
        f = open("map", "w")
        for k in g2h:
            f.write("%s %s\n" % (k, node.hex(g2h[k])))
        f.close()

    def buildtrees(dirs):
        tree = ""
        for name in dirs.keys():
            if dirs[name].__class__ == dict:
                # a directory
                hash = node.bin(buildtrees(dirs[name]))
                mode = "40000"
            else:
                # a file
                hash = dirs[name]
                mode = "100644"

            tree+="%s %s\0%s"%(mode,name,hash)

        return make_tree_object(tree)

    ##

    if usemap:
        load_map()

    if not rev:
        r = repo.changelog.tip()
    else:
        print "looking up \"%s\"" % (node.hex(rev),)
        r = repo.lookup(rev)
        print "got: " + node.hex(r)

    pars = repo.changelog.parents(r)
    if not h2g.has_key(pars[0]) and pars[0] != node.nullid:
        gitobject(ui, repo, pars[0], False)
    if not h2g.has_key(pars[1]) and pars[1] != node.nullid:
        gitobject(ui, repo, pars[1], False)

    (hgmanifest, user, t, files, hgmsg, extra) = repo.changelog.read(r)

    tree = ""
    dirs = {}
    mmap = repo.manifest.read(hgmanifest)
    for name in mmap.keys():
        githash = make_file_object(repo, name, mmap[name])

        pn = name.split("/")
        d = dirs
        for p in pn[:-1]:
            if not d.has_key(p):
                d[p] = {}
                d = d[p]
        d[pn[-1]] = node.bin(githash)

    gittree = buildtrees(dirs)

    try:
        lastline = hgmsg.split("\n")[-2]
        committer = lastline.startswith("committer: ") and lastline[len("committer: "):] or None
        hgmsg = "\n".join(hgmsg.split("\n")[:-2])
    except:
        committer = None

    gitcommit = make_commit_object(gittree, user, committer, "%d %s" % (int(t[0]), tz(t[1])), hgmsg, pars)

    h2g[r] = gitcommit
    g2h[gitcommit] = r

    if usemap:
        save_map()

OBJ_NONE = 0
OBJ_COMMIT = 1
OBJ_TREE = 2
OBJ_BLOB = 3
OBJ_TAG = 4
# 5/6 for future expansion 
OBJ_DELTA = 7

class pack:
    def __init__(self, hash):
        self.hash = hash
        self.__type = None
        self.offset = None
        self.reall = None
	self.clen = None

    def __str__(self):
        return self.hash

    def __repr__(self):
        return "<pack %s>" % (self.hash,)

    def __cmp__(self, rhs):
        return cmp(self.hash, rhs.hash)

    def bin(self):
        return node.bin(self.hash)

    def dump(self):
        f = open(".git/objects/%s/%s" % (self.hash[0:2], self.hash[2:]), "r")
	buf = f.read()
	f.close()

        decbuf = zlib.decompress(buf)
	header, decbuf = decbuf.split("\0", 1)
	self.strtype, self.reall = header.split(" ")

	if self.strtype == "blob":
            self.__type = OBJ_BLOB
	elif self.strtype == "tree":
            self.__type = OBJ_TREE
	elif self.strtype == "commit":
            self.__type = OBJ_COMMIT
        else:
            raise ValueError

	buf = zlib.compress(decbuf)

	self.reall = int(self.reall)
	self.clen = len(buf)

	return buf

    def reallen(self):
        if self.reall is None:
            self.dump()
	
	print "%s %s %d (%d) %d" % (self.hash, self.strtype, self.reall,
			self.clen, self.offset)
        return self.reall

    def type(self):
        if self.__type is None:
            self.dump()

        return self.__type

def gitpack(ui, repo, objs=None):
    "Create a git pack file for a given range of revisions"

    def hexify(i):
        def _h(i):
            return chr(i)
        return _h(i >> 24) + _h((i >> 16) & 0xff) + _h((i >> 8) & 0xff) + _h(i & 0xff)
    
    def hashed_write(h, f, d):
        h.update(d)
        f.write(d)
	return len(d)

    def typelen(t, l):
        buf = chr(0x80 | (t & 0x7) << 4 | (l & 0xf))
        l >>= 4

        while l > 0x7f:
	    buf += chr((l & 0x7f) | 0x80)
            l >>= 7

	if l or len(buf) == 1:
            buf += chr(l & 0x7f)

        return buf

    ##

    __objs = []
    if objs:
        for h in revs.split(","):
            __objs.append(pack(h))
    else:
	    print "Need a list of revs"
	    return

    objs = __objs
    __objs = None
    print objs

    hash = sha.sha()
    packf = open("git.pack", "w")

    hashed_write(hash, packf, "PACK")		# magic
    hashed_write(hash, packf, hexify(2))	# version
    hashed_write(hash, packf, hexify(len(objs)))# number of objects

    off = 12 # header is 12 bytes long
    for obj in objs:
	obj.offset = off

        rawobj = obj.dump()

        off += hashed_write(hash, packf, typelen(obj.type(), obj.reallen()))
        off += hashed_write(hash, packf, rawobj)

    packhash = hash.digest()
    packf.write(packhash)
    packf.close()

    #
    # index file
    #
    hash = sha.sha()
    idx = open("git.idx", "w")

    objs.sort()

    # header
    count = 0
    oidx = 0
    for i in range(0,256):
	if oidx < len(objs) and str(objs[oidx]).startswith("%02x" % (i,)):
            oidx += 1
	    count += 1
        hashed_write(hash, idx, hexify(count))

    # per object entries
    for obj in objs:
        hashed_write(hash, idx, hexify(obj.offset))
        hashed_write(hash, idx, obj.bin())

    hashed_write(hash, idx, packhash)
    idx.write(hash.digest())
    idx.close()

cmdtable = {
    "gitserve":
    (gitserve,
     [],
    'hg gitserve'),
    "gitpack":
    (gitpack,
     [('o', 'objs', '', 'comma separated list of revisions to create a pack for')],
    'hg gitpack'),
    "gitobject":
    (gitobject,
     [('r', 'rev', '', 'revision to create objects for')],
    'hg gitobject'),
}

def reposetup(ui, repo):
    pass