Mercurial > nomad > experimental
changeset 1220:90b42fb22d6a
objstore: add a page-based caching layer
Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author | Josef 'Jeff' Sipek <jeffpc@josefsipek.net> |
---|---|
date | Tue, 14 Apr 2020 21:41:38 -0400 |
parents | c1d0d4b7d8d5 |
children | 52273821b9f2 |
files | src/objstore/CMakeLists.txt src/objstore/cache.c src/objstore/include/nomad/objstore_backend.h src/objstore/obj.c src/objstore/objstore.c src/objstore/objstore_impl.h |
diffstat | 6 files changed, 308 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/src/objstore/CMakeLists.txt Fri Apr 17 15:42:29 2020 -0400 +++ b/src/objstore/CMakeLists.txt Tue Apr 14 21:41:38 2020 -0400 @@ -21,6 +21,7 @@ # add_library(nomad_objstore SHARED + cache.c dirblock.c dirent.c dirent_target_packing.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/objstore/cache.c Tue Apr 14 21:41:38 2020 -0400 @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <jeffpc/list.h> +#include <jeffpc/rbtree.h> + +#include "objstore_impl.h" + +static size_t npages; +static struct page *pages; + +/* internal lists */ +static struct list free_pages; /* fully allocated */ +static struct list unallocated_pages; /* missing ->ptr */ + +int page_cache_init(size_t max_size) +{ + size_t i; + + npages = MAX(max_size / PAGE_SIZE, 1); + + pages = calloc(npages, sizeof(struct page)); + if (!pages) + return -ENOMEM; + + list_create(&free_pages, sizeof(struct page), + offsetof(struct page, pages)); + list_create(&unallocated_pages, sizeof(struct page), + offsetof(struct page, pages)); + + for (i = 0; i < npages; i++) + list_insert_tail(&unallocated_pages, &pages[i]); + + return 0; +} + +void page_cache_free(void) +{ + size_t i; + + for (i = 0; i < npages; i++) { + ASSERT(!pages[i].inuse); + + free(pages[i].ptr); + } + + free(pages); +} + +static int page_cmp(const void *va, const void *vb) +{ + const struct page *a = va; + const struct page *b = vb; + + if (a->pgno < b->pgno) + return -1; + if (a->pgno > b->pgno) + return +1; + return 0; +} + +void page_cache_init_objver(struct objver *ver) +{ + rb_create(&ver->pages, page_cmp, sizeof(struct page), + offsetof(struct page, node)); +} + +void page_cache_deinit_objver(struct objver *ver) +{ + ASSERT0(!rb_numnodes(&ver->pages)); + + rb_destroy(&ver->pages); +} + +static struct page *get_free_page(void) +{ + struct page *page; + + page = list_head(&free_pages); + if (page) { + list_remove(&free_pages, page); + goto out; + } + + page = list_head(&unallocated_pages); + if (page) { + page->ptr = malloc(PAGE_SIZE); + if (page->ptr) { + list_remove(&unallocated_pages, page); + goto out; + } + } + + panic("Failed to allocate a new page! Out of memory?"); + +out: + VERIFY3P(page->ptr, !=, NULL); + + return page; +} + +static void free_page(struct page *page) +{ + if (page->ptr) + list_insert_head(&free_pages, page); + else + list_insert_head(&unallocated_pages, page); +} + +struct page *page_lookup(struct objver *ver, uint64_t pgno, int flags) +{ + struct rb_cookie cookie; + struct page key = { + .pgno = pgno, + }; + struct page *page; + + page = rb_find(&ver->pages, &key, &cookie); + if (page) { + page_lock(page); + + return page; + } + + if (!(flags & PG_ALLOC)) + return ERR_PTR(-ENOENT); + + page = get_free_page(); + if (IS_ERR(page)) + return page; + + page->objver = ver; + page->pgno = pgno; + VERIFY(page->ptr); + page->inuse = true; + page->filled = false; + page->dirty = false; + + if (flags & PG_FILL) { + /* + * We need to fill the page in one of two different ways + * depending on whether the page comes before or ofter the + * offset of the shortest truncation (or EOF): + * + * (1) all pages beyond the shortest truncation must be + * zero filled + * + * (2) all pages before the shortest truncation must be + * read in from disk + * + * The shortest truncation point takes care of all cases - + * including those where we extend the file after truncating + * it. + */ + + int ret; + + if (pgno < (p2roundup(ver->txn.min_size, PAGE_SIZE) / PAGE_SIZE)) { + /* existed before */ + + struct objver *readver; + + /* + * We have to figure out if the objver we are trying + * to read from is in a transaction that has a cow + * for it. If it does, we have to issue the read to + * the previous version since the new version + * doesn't exist on disk until the transaction + * commits. + * + * Since cow can only happen on unqualified opens, + * we check it for the cow-in-progress previous + * objver pointer. + * + * TODO: Is this right? How will that work when + * trying to merge two heads of an object? + */ + + if (ver->open[false]->cow.prev_ver) + readver = ver->open[false]->cow.prev_ver; + else + readver = ver; + + ret = ver->obj->ops->read_page(readver, page->ptr, pgno); + if (ret) { + free_page(page); + return ERR_PTR(ret); + } + } else { + /* completely new */ + memset(page->ptr, 0, PAGE_SIZE); + } + + page->filled = true; + } + + VERIFY3P(rb_insert_here(&ver->pages, page, &cookie), ==, NULL); + + return page; +} + +void page_inval_range(struct objver *ver, uint64_t pgno, size_t pgcnt) +{ + const uint64_t first_pgno = pgno; + const uint64_t last_pgno = pgno + pgcnt - 1; + struct rb_cookie cookie; + struct page key = { + .pgno = first_pgno, + }; + struct page *page; + + if (!pgcnt) + return; + + page = rb_find(&ver->pages, &key, &cookie); + if (!page) + page = rb_nearest_gt(&ver->pages, &cookie); + + for (;;) { + struct page *next; + + if (!page || (page->pgno > last_pgno)) + break; + + ASSERT(page->inuse); + ASSERT(!page->dirty); /* catch data loss */ + ASSERT(page->ptr); + + next = rb_next(&ver->pages, page); + + rb_remove(&ver->pages, page); + + page->inuse = false; + + list_insert_head(&free_pages, page); + + page = next; + } +}
--- a/src/objstore/include/nomad/objstore_backend.h Fri Apr 17 15:42:29 2020 -0400 +++ b/src/objstore/include/nomad/objstore_backend.h Tue Apr 14 21:41:38 2020 -0400 @@ -104,6 +104,7 @@ struct obj *obj; struct rb_node all_node; /* all versions */ struct rb_node head_node; /* head versions */ + struct rb_tree pages; /* cached pages for this version */ }; struct objstore_open_obj_info {
--- a/src/objstore/obj.c Fri Apr 17 15:42:29 2020 -0400 +++ b/src/objstore/obj.c Tue Apr 14 21:41:38 2020 -0400 @@ -117,6 +117,8 @@ ver->txn.min_size = 0; ver->obj = NULL; + page_cache_init_objver(ver); + return ver; } @@ -134,6 +136,8 @@ for (i = 0; i < ARRAY_LEN(ver->open); i++) ASSERT3P(ver->open[i], ==, NULL); + page_cache_deinit_objver(ver); + mem_cache_free(objver_cache, ver); }
--- a/src/objstore/objstore.c Fri Apr 17 15:42:29 2020 -0400 +++ b/src/objstore/objstore.c Tue Apr 14 21:41:38 2020 -0400 @@ -118,14 +118,19 @@ list_create(&backends, sizeof(struct backend), offsetof(struct backend, node)); + /* TODO: turn the size argument into a config knob */ + ret = page_cache_init(512 * 1024 * 1024); + if (ret) + return ret; + ret = vdev_init(); if (ret) - return ret; + goto err; obj_cache = mem_cache_create("obj", sizeof(struct obj), 0); if (IS_ERR(obj_cache)) { ret = PTR_ERR(obj_cache); - goto err; + goto err_vdev; } objver_cache = mem_cache_create("objver", sizeof(struct objver), 0); @@ -164,8 +169,11 @@ err_obj: mem_cache_destroy(obj_cache); +err_vdev: + vdev_fini(); + err: - vdev_fini(); + page_cache_free(); return ret; }
--- a/src/objstore/objstore_impl.h Fri Apr 17 15:42:29 2020 -0400 +++ b/src/objstore/objstore_impl.h Tue Apr 14 21:41:38 2020 -0400 @@ -36,6 +36,24 @@ void *module; }; +enum pg_find_flags { + PG_ALLOC = 0x01, + PG_FILL = 0x02, +}; + +struct page { + union { + struct rb_node node; /* in-use: objver pages tree */ + struct list pages; /* free: internal page list */ + }; + struct objver *objver; + uint64_t pgno; + uint8_t *ptr; + bool inuse:1; /* used by an object version */ + bool filled:1; /* contains data */ + bool dirty:1; /* must be written back */ +}; + /* internal backend management */ extern struct backend *backend_lookup(const char *name); @@ -116,6 +134,21 @@ return err; } +/* + * page-based cache + */ +extern int page_cache_init(size_t max_size); +extern void page_cache_free(void); +extern void page_cache_init_objver(struct objver *ver); +extern void page_cache_deinit_objver(struct objver *ver); + +extern struct page *page_lookup(struct objver *ver, uint64_t pgno, int flags); +extern void page_inval_range(struct objver *ver, uint64_t pgno, size_t pgcnt); + +/* for now, we rely on the obj lock */ +#define page_lock(page) do { } while (0) +#define page_unlock(page) do { } while (0) + static inline void reset_objver_txn_info(struct txn *txn, struct objver *ver) { uint64_t cookie = txn ? txn->id : UINT64_MAX;