changeset 1220:90b42fb22d6a

objstore: add a page-based caching layer Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
author Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
date Tue, 14 Apr 2020 21:41:38 -0400
parents c1d0d4b7d8d5
children 52273821b9f2
files src/objstore/CMakeLists.txt src/objstore/cache.c src/objstore/include/nomad/objstore_backend.h src/objstore/obj.c src/objstore/objstore.c src/objstore/objstore_impl.h
diffstat 6 files changed, 308 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/src/objstore/CMakeLists.txt	Fri Apr 17 15:42:29 2020 -0400
+++ b/src/objstore/CMakeLists.txt	Tue Apr 14 21:41:38 2020 -0400
@@ -21,6 +21,7 @@
 #
 
 add_library(nomad_objstore SHARED
+	cache.c
 	dirblock.c
 	dirent.c
 	dirent_target_packing.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/objstore/cache.c	Tue Apr 14 21:41:38 2020 -0400
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2020 Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <jeffpc/list.h>
+#include <jeffpc/rbtree.h>
+
+#include "objstore_impl.h"
+
+static size_t npages;
+static struct page *pages;
+
+/* internal lists */
+static struct list free_pages;	/* fully allocated */
+static struct list unallocated_pages; /* missing ->ptr */
+
+int page_cache_init(size_t max_size)
+{
+	size_t i;
+
+	npages = MAX(max_size / PAGE_SIZE, 1);
+
+	pages = calloc(npages, sizeof(struct page));
+	if (!pages)
+		return -ENOMEM;
+
+	list_create(&free_pages, sizeof(struct page),
+		    offsetof(struct page, pages));
+	list_create(&unallocated_pages, sizeof(struct page),
+		    offsetof(struct page, pages));
+
+	for (i = 0; i < npages; i++)
+		list_insert_tail(&unallocated_pages, &pages[i]);
+
+	return 0;
+}
+
+void page_cache_free(void)
+{
+	size_t i;
+
+	for (i = 0; i < npages; i++) {
+		ASSERT(!pages[i].inuse);
+
+		free(pages[i].ptr);
+	}
+
+	free(pages);
+}
+
+static int page_cmp(const void *va, const void *vb)
+{
+	const struct page *a = va;
+	const struct page *b = vb;
+
+	if (a->pgno < b->pgno)
+		return -1;
+	if (a->pgno > b->pgno)
+		return +1;
+	return 0;
+}
+
+void page_cache_init_objver(struct objver *ver)
+{
+	rb_create(&ver->pages, page_cmp, sizeof(struct page),
+		  offsetof(struct page, node));
+}
+
+void page_cache_deinit_objver(struct objver *ver)
+{
+	ASSERT0(!rb_numnodes(&ver->pages));
+
+	rb_destroy(&ver->pages);
+}
+
+static struct page *get_free_page(void)
+{
+	struct page *page;
+
+	page = list_head(&free_pages);
+	if (page) {
+		list_remove(&free_pages, page);
+		goto out;
+	}
+
+	page = list_head(&unallocated_pages);
+	if (page) {
+		page->ptr = malloc(PAGE_SIZE);
+		if (page->ptr) {
+			list_remove(&unallocated_pages, page);
+			goto out;
+		}
+	}
+
+	panic("Failed to allocate a new page! Out of memory?");
+
+out:
+	VERIFY3P(page->ptr, !=, NULL);
+
+	return page;
+}
+
+static void free_page(struct page *page)
+{
+	if (page->ptr)
+		list_insert_head(&free_pages, page);
+	else
+		list_insert_head(&unallocated_pages, page);
+}
+
+struct page *page_lookup(struct objver *ver, uint64_t pgno, int flags)
+{
+	struct rb_cookie cookie;
+	struct page key = {
+		.pgno = pgno,
+	};
+	struct page *page;
+
+	page = rb_find(&ver->pages, &key, &cookie);
+	if (page) {
+		page_lock(page);
+
+		return page;
+	}
+
+	if (!(flags & PG_ALLOC))
+		return ERR_PTR(-ENOENT);
+
+	page = get_free_page();
+	if (IS_ERR(page))
+		return page;
+
+	page->objver = ver;
+	page->pgno = pgno;
+	VERIFY(page->ptr);
+	page->inuse = true;
+	page->filled = false;
+	page->dirty = false;
+
+	if (flags & PG_FILL) {
+		/*
+		 * We need to fill the page in one of two different ways
+		 * depending on whether the page comes before or ofter the
+		 * offset of the shortest truncation (or EOF):
+		 *
+		 *   (1) all pages beyond the shortest truncation must be
+		 *       zero filled
+		 *
+		 *   (2) all pages before the shortest truncation must be
+		 *       read in from disk
+		 *
+		 * The shortest truncation point takes care of all cases -
+		 * including those where we extend the file after truncating
+		 * it.
+		 */
+
+		int ret;
+
+		if (pgno < (p2roundup(ver->txn.min_size, PAGE_SIZE) / PAGE_SIZE)) {
+			/* existed before */
+
+			struct objver *readver;
+
+			/*
+			 * We have to figure out if the objver we are trying
+			 * to read from is in a transaction that has a cow
+			 * for it.  If it does, we have to issue the read to
+			 * the previous version since the new version
+			 * doesn't exist on disk until the transaction
+			 * commits.
+			 *
+			 * Since cow can only happen on unqualified opens,
+			 * we check it for the cow-in-progress previous
+			 * objver pointer.
+			 *
+			 * TODO: Is this right?  How will that work when
+			 * trying to merge two heads of an object?
+			 */
+
+			if (ver->open[false]->cow.prev_ver)
+				readver = ver->open[false]->cow.prev_ver;
+			else
+				readver = ver;
+
+			ret = ver->obj->ops->read_page(readver, page->ptr, pgno);
+			if (ret) {
+				free_page(page);
+				return ERR_PTR(ret);
+			}
+		} else {
+			/* completely new */
+			memset(page->ptr, 0, PAGE_SIZE);
+		}
+
+		page->filled = true;
+	}
+
+	VERIFY3P(rb_insert_here(&ver->pages, page, &cookie), ==, NULL);
+
+	return page;
+}
+
+void page_inval_range(struct objver *ver, uint64_t pgno, size_t pgcnt)
+{
+	const uint64_t first_pgno = pgno;
+	const uint64_t last_pgno = pgno + pgcnt - 1;
+	struct rb_cookie cookie;
+	struct page key = {
+		.pgno = first_pgno,
+	};
+	struct page *page;
+
+	if (!pgcnt)
+		return;
+
+	page = rb_find(&ver->pages, &key, &cookie);
+	if (!page)
+		page = rb_nearest_gt(&ver->pages, &cookie);
+
+	for (;;) {
+		struct page *next;
+
+		if (!page || (page->pgno > last_pgno))
+			break;
+
+		ASSERT(page->inuse);
+		ASSERT(!page->dirty); /* catch data loss */
+		ASSERT(page->ptr);
+
+		next = rb_next(&ver->pages, page);
+
+		rb_remove(&ver->pages, page);
+
+		page->inuse = false;
+
+		list_insert_head(&free_pages, page);
+
+		page = next;
+	}
+}
--- a/src/objstore/include/nomad/objstore_backend.h	Fri Apr 17 15:42:29 2020 -0400
+++ b/src/objstore/include/nomad/objstore_backend.h	Tue Apr 14 21:41:38 2020 -0400
@@ -104,6 +104,7 @@
 	struct obj *obj;
 	struct rb_node all_node;	/* all versions */
 	struct rb_node head_node;	/* head versions */
+	struct rb_tree pages;		/* cached pages for this version */
 };
 
 struct objstore_open_obj_info {
--- a/src/objstore/obj.c	Fri Apr 17 15:42:29 2020 -0400
+++ b/src/objstore/obj.c	Tue Apr 14 21:41:38 2020 -0400
@@ -117,6 +117,8 @@
 	ver->txn.min_size = 0;
 	ver->obj = NULL;
 
+	page_cache_init_objver(ver);
+
 	return ver;
 }
 
@@ -134,6 +136,8 @@
 	for (i = 0; i < ARRAY_LEN(ver->open); i++)
 		ASSERT3P(ver->open[i], ==, NULL);
 
+	page_cache_deinit_objver(ver);
+
 	mem_cache_free(objver_cache, ver);
 }
 
--- a/src/objstore/objstore.c	Fri Apr 17 15:42:29 2020 -0400
+++ b/src/objstore/objstore.c	Tue Apr 14 21:41:38 2020 -0400
@@ -118,14 +118,19 @@
 	list_create(&backends, sizeof(struct backend),
 		    offsetof(struct backend, node));
 
+	/* TODO: turn the size argument into a config knob */
+	ret = page_cache_init(512 * 1024 * 1024);
+	if (ret)
+		return ret;
+
 	ret = vdev_init();
 	if (ret)
-		return ret;
+		goto err;
 
 	obj_cache = mem_cache_create("obj", sizeof(struct obj), 0);
 	if (IS_ERR(obj_cache)) {
 		ret = PTR_ERR(obj_cache);
-		goto err;
+		goto err_vdev;
 	}
 
 	objver_cache = mem_cache_create("objver", sizeof(struct objver), 0);
@@ -164,8 +169,11 @@
 err_obj:
 	mem_cache_destroy(obj_cache);
 
+err_vdev:
+	vdev_fini();
+
 err:
-	vdev_fini();
+	page_cache_free();
 
 	return ret;
 }
--- a/src/objstore/objstore_impl.h	Fri Apr 17 15:42:29 2020 -0400
+++ b/src/objstore/objstore_impl.h	Tue Apr 14 21:41:38 2020 -0400
@@ -36,6 +36,24 @@
 	void *module;
 };
 
+enum pg_find_flags {
+	PG_ALLOC = 0x01,
+	PG_FILL = 0x02,
+};
+
+struct page {
+	union {
+		struct rb_node node;	/* in-use: objver pages tree */
+		struct list pages;	/* free: internal page list */
+	};
+	struct objver *objver;
+	uint64_t pgno;
+	uint8_t *ptr;
+	bool inuse:1; /* used by an object version */
+	bool filled:1; /* contains data */
+	bool dirty:1; /* must be written back */
+};
+
 /* internal backend management */
 extern struct backend *backend_lookup(const char *name);
 
@@ -116,6 +134,21 @@
 	return err;
 }
 
+/*
+ * page-based cache
+ */
+extern int page_cache_init(size_t max_size);
+extern void page_cache_free(void);
+extern void page_cache_init_objver(struct objver *ver);
+extern void page_cache_deinit_objver(struct objver *ver);
+
+extern struct page *page_lookup(struct objver *ver, uint64_t pgno, int flags);
+extern void page_inval_range(struct objver *ver, uint64_t pgno, size_t pgcnt);
+
+/* for now, we rely on the obj lock */
+#define page_lock(page)		do { } while (0)
+#define page_unlock(page)	do { } while (0)
+
 static inline void reset_objver_txn_info(struct txn *txn, struct objver *ver)
 {
 	uint64_t cookie = txn ? txn->id : UINT64_MAX;