You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
8976 lines
284 KiB
8976 lines
284 KiB
From 379b9f7247a4daac9545e3dec79d3c2660111d8d Mon Sep 17 00:00:00 2001 |
|
From: Hari Gowtham <hgowtham@redhat.com> |
|
Date: Mon, 8 Apr 2019 11:32:09 +0530 |
|
Subject: [PATCH 085/124] Revert "all: remove code which is not being |
|
considered in build" |
|
|
|
This reverts most part of commit 8293d21280fd6ddfc9bb54068cf87794fc6be207. |
|
It adds in the changes for tier and CTR with the neccesary changes for building it. |
|
|
|
Label: DOWNSTREAM ONLY |
|
|
|
Change-Id: I8f7978618f2a6a949b09dbcfd25722494cb8f1cd |
|
Signed-off-by: Hari Gowtham <hgowtham@redhat.com> |
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/166245 |
|
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com> |
|
Tested-by: RHGS Build Bot <nigelb@redhat.com> |
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> |
|
--- |
|
Makefile.am | 8 +- |
|
configure.ac | 34 + |
|
glusterfs.spec.in | 19 + |
|
libglusterfs/Makefile.am | 4 +- |
|
libglusterfs/src/glusterfs/mem-types.h | 1 + |
|
xlators/cluster/dht/src/Makefile.am | 14 +- |
|
xlators/cluster/dht/src/dht-rebalance.c | 12 + |
|
xlators/cluster/dht/src/tier-common.c | 1199 ++++++++ |
|
xlators/cluster/dht/src/tier-common.h | 55 + |
|
xlators/cluster/dht/src/tier.c | 3105 ++++++++++++++++++++ |
|
xlators/cluster/dht/src/tier.h | 110 + |
|
xlators/features/Makefile.am | 2 +- |
|
xlators/features/changetimerecorder/Makefile.am | 3 + |
|
.../features/changetimerecorder/src/Makefile.am | 26 + |
|
.../changetimerecorder/src/changetimerecorder.c | 2371 +++++++++++++++ |
|
.../changetimerecorder/src/changetimerecorder.h | 21 + |
|
.../features/changetimerecorder/src/ctr-helper.c | 293 ++ |
|
.../features/changetimerecorder/src/ctr-helper.h | 854 ++++++ |
|
.../features/changetimerecorder/src/ctr-messages.h | 61 + |
|
.../changetimerecorder/src/ctr-xlator-ctx.c | 362 +++ |
|
.../changetimerecorder/src/ctr-xlator-ctx.h | 68 + |
|
.../changetimerecorder/src/ctr_mem_types.h | 22 + |
|
22 files changed, 8637 insertions(+), 7 deletions(-) |
|
create mode 100644 xlators/cluster/dht/src/tier-common.c |
|
create mode 100644 xlators/cluster/dht/src/tier-common.h |
|
create mode 100644 xlators/cluster/dht/src/tier.c |
|
create mode 100644 xlators/cluster/dht/src/tier.h |
|
create mode 100644 xlators/features/changetimerecorder/Makefile.am |
|
create mode 100644 xlators/features/changetimerecorder/src/Makefile.am |
|
create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.c |
|
create mode 100644 xlators/features/changetimerecorder/src/changetimerecorder.h |
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.c |
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-helper.h |
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-messages.h |
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.c |
|
create mode 100644 xlators/features/changetimerecorder/src/ctr-xlator-ctx.h |
|
create mode 100644 xlators/features/changetimerecorder/src/ctr_mem_types.h |
|
|
|
diff --git a/Makefile.am b/Makefile.am |
|
index e0c795f..613382f 100644 |
|
--- a/Makefile.am |
|
+++ b/Makefile.am |
|
@@ -3,7 +3,7 @@ SOURCES = site.h |
|
EXTRA_DIST = autogen.sh \ |
|
COPYING-GPLV2 COPYING-LGPLV3 COMMITMENT \ |
|
INSTALL README.md AUTHORS THANKS NEWS \ |
|
- glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \ |
|
+ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in libgfdb.pc.in \ |
|
run-tests.sh \ |
|
build-aux/pkg-version \ |
|
contrib/umountd \ |
|
@@ -15,8 +15,12 @@ SUBDIRS = $(ARGP_STANDALONE_DIR) rpc/xdr/gen libglusterfs rpc api xlators \ |
|
|
|
pkgconfigdir = @pkgconfigdir@ |
|
pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc |
|
+if USE_GFDB |
|
+pkgconfig_DATA += libgfdb.pc |
|
+endif |
|
|
|
-CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile |
|
+CLEANFILES = glusterfs-api.pc libgfchangelog.pc libgfdb.pc \ |
|
+ contrib/umountd/Makefile |
|
|
|
gitclean: distclean |
|
find . -name Makefile.in -exec rm -f {} \; |
|
diff --git a/configure.ac b/configure.ac |
|
index baa811a..633e850 100644 |
|
--- a/configure.ac |
|
+++ b/configure.ac |
|
@@ -30,6 +30,7 @@ AC_CONFIG_HEADERS([config.h site.h]) |
|
AC_CONFIG_FILES([Makefile |
|
libglusterfs/Makefile |
|
libglusterfs/src/Makefile |
|
+ libglusterfs/src/gfdb/Makefile |
|
geo-replication/src/peer_gsec_create |
|
geo-replication/src/peer_mountbroker |
|
geo-replication/src/peer_mountbroker.py |
|
@@ -121,6 +122,8 @@ AC_CONFIG_FILES([Makefile |
|
xlators/features/changelog/src/Makefile |
|
xlators/features/changelog/lib/Makefile |
|
xlators/features/changelog/lib/src/Makefile |
|
+ xlators/features/changetimerecorder/Makefile |
|
+ xlators/features/changetimerecorder/src/Makefile |
|
xlators/features/locks/Makefile |
|
xlators/features/locks/src/Makefile |
|
xlators/features/quota/Makefile |
|
@@ -237,6 +240,7 @@ AC_CONFIG_FILES([Makefile |
|
contrib/umountd/Makefile |
|
glusterfs-api.pc |
|
libgfchangelog.pc |
|
+ libgfdb.pc |
|
api/Makefile |
|
api/src/Makefile |
|
api/examples/Makefile |
|
@@ -866,6 +870,33 @@ AM_CONDITIONAL([USE_FIREWALLD],test ["x${BUILD_FIREWALLD}" = "xyes"]) |
|
|
|
#endof firewald section |
|
|
|
+# Data tiering requires sqlite |
|
+AC_ARG_ENABLE([tiering], |
|
+ AC_HELP_STRING([--disable-tiering], |
|
+ [Disable data classification/tiering]), |
|
+ [BUILD_GFDB="${enableval}"], [BUILD_GFDB="yes"]) |
|
+ |
|
+case $host_os in |
|
+ darwin*) |
|
+ SQLITE_LIBS="-lsqlite3" |
|
+ AC_CHECK_HEADERS([sqlite3.h], AC_DEFINE(USE_GFDB, 1)) |
|
+ ;; |
|
+ *) |
|
+ if test "x${BUILD_GFDB}" = "xyes"; then |
|
+ PKG_CHECK_MODULES([SQLITE], [sqlite3], |
|
+ AC_DEFINE(USE_GFDB, 1), |
|
+ AC_MSG_ERROR([pass --disable-tiering to build without sqlite])) |
|
+ else |
|
+ AC_DEFINE(USE_GFDB, 0, [no sqlite, gfdb is disabled]) |
|
+ fi |
|
+ ;; |
|
+esac |
|
+ |
|
+AC_SUBST(SQLITE_CFLAGS) |
|
+AC_SUBST(SQLITE_LIBS) |
|
+AM_CONDITIONAL(BUILD_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes") |
|
+AM_CONDITIONAL(USE_GFDB, test "x${with_server}" = "xyes" -a "x${BUILD_GFDB}" = "xyes") |
|
+ |
|
# xml-output |
|
AC_ARG_ENABLE([xml-output], |
|
AC_HELP_STRING([--disable-xml-output], |
|
@@ -1544,6 +1575,8 @@ GFAPI_VERSION="7."${PACKAGE_VERSION} |
|
LIBGFCHANGELOG_VERSION="0.0.1" |
|
AC_SUBST(GFAPI_VERSION) |
|
AC_SUBST(LIBGFCHANGELOG_VERSION) |
|
+LIBGFDB_VERSION="0.0.1" |
|
+AC_SUBST(LIBGFDB_VERSION) |
|
|
|
dnl libtool versioning |
|
LIBGFXDR_LT_VERSION="0:1:0" |
|
@@ -1584,6 +1617,7 @@ echo "XML output : $BUILD_XML_OUTPUT" |
|
echo "Unit Tests : $BUILD_UNITTEST" |
|
echo "Track priv ports : $TRACK_PRIVPORTS" |
|
echo "POSIX ACLs : $BUILD_POSIX_ACLS" |
|
+echo "Data Classification : $BUILD_GFDB" |
|
echo "firewalld-config : $BUILD_FIREWALLD" |
|
echo "Events : $BUILD_EVENTS" |
|
echo "EC dynamic support : $EC_DYNAMIC_SUPPORT" |
|
diff --git a/glusterfs.spec.in b/glusterfs.spec.in |
|
index 2149f86..e0607ba 100644 |
|
--- a/glusterfs.spec.in |
|
+++ b/glusterfs.spec.in |
|
@@ -154,6 +154,7 @@ |
|
%global _without_events --disable-events |
|
%global _without_georeplication --disable-georeplication |
|
%global _with_gnfs %{nil} |
|
+%global _without_tiering --disable-tiering |
|
%global _without_ocf --without-ocf |
|
%endif |
|
|
|
@@ -287,6 +288,9 @@ BuildRequires: libuuid-devel |
|
%if ( 0%{?_with_cmocka:1} ) |
|
BuildRequires: libcmocka-devel >= 1.0.1 |
|
%endif |
|
+%if ( 0%{!?_without_tiering:1} ) |
|
+BuildRequires: sqlite-devel |
|
+%endif |
|
%if ( 0%{!?_without_georeplication:1} ) |
|
BuildRequires: libattr-devel |
|
%endif |
|
@@ -797,6 +801,7 @@ export LDFLAGS |
|
%{?_without_rdma} \ |
|
%{?_without_server} \ |
|
%{?_without_syslog} \ |
|
+ %{?_without_tiering} \ |
|
%{?_with_ipv6default} \ |
|
%{?_without_libtirpc} |
|
|
|
@@ -1232,9 +1237,15 @@ exit 0 |
|
%if ( 0%{?_without_server:1} ) |
|
%exclude %{_libdir}/pkgconfig/libgfchangelog.pc |
|
%exclude %{_libdir}/libgfchangelog.so |
|
+%if ( 0%{!?_without_tiering:1} ) |
|
+%{_libdir}/pkgconfig/libgfdb.pc |
|
+%endif |
|
%else |
|
%{_libdir}/pkgconfig/libgfchangelog.pc |
|
%{_libdir}/libgfchangelog.so |
|
+%if ( 0%{!?_without_tiering:1} ) |
|
+%{_libdir}/pkgconfig/libgfdb.pc |
|
+%endif |
|
%endif |
|
|
|
%files client-xlators |
|
@@ -1330,6 +1341,10 @@ exit 0 |
|
%files libs |
|
%{_libdir}/*.so.* |
|
%exclude %{_libdir}/libgfapi.* |
|
+%if ( 0%{!?_without_tiering:1} ) |
|
+# libgfdb is only needed server-side |
|
+%exclude %{_libdir}/libgfdb.* |
|
+%endif |
|
|
|
%files -n python%{_pythonver}-gluster |
|
# introducing glusterfs module in site packages. |
|
@@ -1417,6 +1432,10 @@ exit 0 |
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so |
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so |
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so |
|
+%if ( 0%{!?_without_tiering:1} ) |
|
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so |
|
+ %{_libdir}/libgfdb.so.* |
|
+%endif |
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so |
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so |
|
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix* |
|
diff --git a/libglusterfs/Makefile.am b/libglusterfs/Makefile.am |
|
index d471a3f..7e72f61 100644 |
|
--- a/libglusterfs/Makefile.am |
|
+++ b/libglusterfs/Makefile.am |
|
@@ -1,3 +1,3 @@ |
|
-SUBDIRS = src |
|
+SUBDIRS = src src/gfdb |
|
|
|
-CLEANFILES = |
|
+CLEANFILES = |
|
diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h |
|
index 832f68c..92730a9 100644 |
|
--- a/libglusterfs/src/glusterfs/mem-types.h |
|
+++ b/libglusterfs/src/glusterfs/mem-types.h |
|
@@ -138,6 +138,7 @@ enum gf_common_mem_types_ { |
|
gf_common_volfile_t, |
|
gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */ |
|
gf_common_mt_server_cmdline_t, /* used only in one location */ |
|
+ gf_mt_gfdb_query_record_t, |
|
gf_common_mt_end |
|
}; |
|
#endif |
|
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am |
|
index 56f1f2a..5532047 100644 |
|
--- a/xlators/cluster/dht/src/Makefile.am |
|
+++ b/xlators/cluster/dht/src/Makefile.am |
|
@@ -1,4 +1,7 @@ |
|
xlator_LTLIBRARIES = dht.la nufa.la switch.la |
|
+if BUILD_GFDB |
|
+ xlator_LTLIBRARIES += tier.la |
|
+endif |
|
|
|
AM_CFLAGS = -Wall $(GF_CFLAGS) |
|
|
|
@@ -13,6 +16,7 @@ dht_la_SOURCES = $(dht_common_source) dht.c |
|
|
|
nufa_la_SOURCES = $(dht_common_source) nufa.c |
|
switch_la_SOURCES = $(dht_common_source) switch.c |
|
+tier_la_SOURCES = $(dht_common_source) tier.c tier-common.c |
|
|
|
dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) |
|
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la |
|
@@ -23,15 +27,21 @@ nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la |
|
switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) |
|
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la |
|
|
|
+tier_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(LIB_DL) |
|
+tier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la |
|
+ |
|
noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \ |
|
- dht-lock.h $(top_builddir)/xlators/lib/src/libxlator.h |
|
+ dht-lock.h tier-common.h tier.h \ |
|
+ $(top_builddir)/xlators/lib/src/libxlator.h |
|
|
|
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ |
|
+ -I$(top_srcdir)/libglusterfs/src/gfdb \ |
|
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ |
|
-I$(top_srcdir)/rpc/rpc-lib/src \ |
|
-I$(top_srcdir)/xlators/lib/src \ |
|
-DDATADIR=\"$(localstatedir)\" \ |
|
- -DLIBDIR=\"$(libdir)\" |
|
+ -DLIBDIR=\"$(libdir)\" \ |
|
+ -DLIBGFDB_VERSION=\"$(LIBGFDB_VERSION)\" |
|
|
|
CLEANFILES = |
|
|
|
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c |
|
index e0f25b1..efbe8a4 100644 |
|
--- a/xlators/cluster/dht/src/dht-rebalance.c |
|
+++ b/xlators/cluster/dht/src/dht-rebalance.c |
|
@@ -8,6 +8,7 @@ |
|
cases as published by the Free Software Foundation. |
|
*/ |
|
|
|
+#include "tier.h" |
|
#include "dht-common.h" |
|
#include <glusterfs/xlator.h> |
|
#include <glusterfs/syscall.h> |
|
@@ -2134,6 +2135,17 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, |
|
} |
|
} |
|
|
|
+ /* store size of previous migrated file */ |
|
+ if (defrag && defrag->tier_conf.is_tier) { |
|
+ if (from != TIER_HASHED_SUBVOL) { |
|
+ defrag->tier_conf.st_last_promoted_size = stbuf.ia_size; |
|
+ } else { |
|
+ /* Don't delete the linkto file on the hashed subvol */ |
|
+ delete_src_linkto = _gf_false; |
|
+ defrag->tier_conf.st_last_demoted_size = stbuf.ia_size; |
|
+ } |
|
+ } |
|
+ |
|
/* The src file is being unlinked after this so we don't need |
|
to clean it up */ |
|
clean_src = _gf_false; |
|
diff --git a/xlators/cluster/dht/src/tier-common.c b/xlators/cluster/dht/src/tier-common.c |
|
new file mode 100644 |
|
index 0000000..b22f477 |
|
--- /dev/null |
|
+++ b/xlators/cluster/dht/src/tier-common.c |
|
@@ -0,0 +1,1199 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#include <glusterfs/glusterfs.h> |
|
+#include <glusterfs/xlator.h> |
|
+#include "libxlator.h" |
|
+#include "dht-common.h" |
|
+#include <glusterfs/defaults.h> |
|
+#include "tier-common.h" |
|
+#include "tier.h" |
|
+ |
|
+int |
|
+dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, inode_t *inode, struct iatt *stbuf, |
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata); |
|
+ |
|
+int |
|
+tier_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, inode_t *inode, struct iatt *stbuf, |
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ loc_t *oldloc = NULL; |
|
+ loc_t *newloc = NULL; |
|
+ |
|
+ local = frame->local; |
|
+ |
|
+ oldloc = &local->loc; |
|
+ newloc = &local->loc2; |
|
+ |
|
+ if (op_ret == -1) { |
|
+ /* No continuation on DHT inode missing errors, as we should |
|
+ * then have a good stbuf that states P2 happened. We would |
|
+ * get inode missing if, the file completed migrated between |
|
+ * the lookup and the link call */ |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (local->call_cnt != 1) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ local->call_cnt = 2; |
|
+ |
|
+ /* Do this on the hot tier now */ |
|
+ |
|
+ STACK_WIND(frame, tier_link_cbk, local->cached_subvol, |
|
+ local->cached_subvol->fops->link, oldloc, newloc, xdata); |
|
+ |
|
+ return 0; |
|
+ |
|
+out: |
|
+ DHT_STRIP_PHASE1_FLAGS(stbuf); |
|
+ |
|
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent, |
|
+ postparent, NULL); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, |
|
+ dict_t *xdata) |
|
+{ |
|
+ xlator_t *cached_subvol = NULL; |
|
+ xlator_t *hashed_subvol = NULL; |
|
+ int op_errno = -1; |
|
+ int ret = -1; |
|
+ dht_local_t *local = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ |
|
+ VALIDATE_OR_GOTO(frame, err); |
|
+ VALIDATE_OR_GOTO(this, err); |
|
+ VALIDATE_OR_GOTO(oldloc, err); |
|
+ VALIDATE_OR_GOTO(newloc, err); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK); |
|
+ if (!local) { |
|
+ op_errno = ENOMEM; |
|
+ goto err; |
|
+ } |
|
+ local->call_cnt = 1; |
|
+ |
|
+ cached_subvol = local->cached_subvol; |
|
+ |
|
+ if (!cached_subvol) { |
|
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", |
|
+ oldloc->path); |
|
+ op_errno = ENOENT; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ hashed_subvol = TIER_HASHED_SUBVOL; |
|
+ |
|
+ ret = loc_copy(&local->loc2, newloc); |
|
+ if (ret == -1) { |
|
+ op_errno = ENOMEM; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ if (hashed_subvol == cached_subvol) { |
|
+ STACK_WIND(frame, dht_link_cbk, cached_subvol, |
|
+ cached_subvol->fops->link, oldloc, newloc, xdata); |
|
+ return 0; |
|
+ } |
|
+ |
|
+ /* Create hardlinks to both the data file on the hot tier |
|
+ and the linkto file on the cold tier */ |
|
+ |
|
+ gf_uuid_copy(local->gfid, oldloc->inode->gfid); |
|
+ |
|
+ STACK_WIND(frame, tier_link_cbk, hashed_subvol, hashed_subvol->fops->link, |
|
+ oldloc, newloc, xdata); |
|
+ |
|
+ return 0; |
|
+err: |
|
+ op_errno = (op_errno == -1) ? errno : op_errno; |
|
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, |
|
+ xlator_t *this, int op_ret, int op_errno, |
|
+ struct iatt *preparent, |
|
+ struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ |
|
+ local = frame->local; |
|
+ |
|
+ if (local->params) { |
|
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); |
|
+ } |
|
+ |
|
+ DHT_STACK_UNWIND(create, frame, -1, local->op_errno, NULL, NULL, NULL, NULL, |
|
+ NULL, NULL); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, |
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ xlator_t *prev = NULL; |
|
+ int ret = -1; |
|
+ dht_local_t *local = NULL; |
|
+ xlator_t *hashed_subvol = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ |
|
+ local = frame->local; |
|
+ conf = this->private; |
|
+ |
|
+ hashed_subvol = TIER_HASHED_SUBVOL; |
|
+ |
|
+ if (!local) { |
|
+ op_ret = -1; |
|
+ op_errno = EINVAL; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (op_ret == -1) { |
|
+ if (local->linked == _gf_true && local->xattr_req) { |
|
+ local->op_errno = op_errno; |
|
+ local->op_ret = op_ret; |
|
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file( |
|
+ local->xattr_req); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, |
|
+ "Failed to set dictionary value to " |
|
+ "unlink of migrating file"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ STACK_WIND(frame, tier_create_unlink_stale_linkto_cbk, |
|
+ hashed_subvol, hashed_subvol->fops->unlink, &local->loc, |
|
+ 0, local->xattr_req); |
|
+ return 0; |
|
+ } |
|
+ goto out; |
|
+ } |
|
+ |
|
+ prev = cookie; |
|
+ |
|
+ if (local->loc.parent) { |
|
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0); |
|
+ |
|
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1); |
|
+ } |
|
+ |
|
+ ret = dht_layout_preset(this, prev, inode); |
|
+ if (ret != 0) { |
|
+ gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s", |
|
+ prev->name); |
|
+ op_ret = -1; |
|
+ op_errno = EINVAL; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ local->op_errno = op_errno; |
|
+ |
|
+ if (local->linked == _gf_true) { |
|
+ local->stbuf = *stbuf; |
|
+ dht_linkfile_attr_heal(frame, this); |
|
+ } |
|
+out: |
|
+ if (local) { |
|
+ if (local->xattr_req) { |
|
+ dict_del(local->xattr_req, TIER_LINKFILE_GFID); |
|
+ } |
|
+ } |
|
+ |
|
+ DHT_STRIP_PHASE1_FLAGS(stbuf); |
|
+ |
|
+ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, |
|
+ preparent, postparent, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, |
|
+ xlator_t *this, int32_t op_ret, |
|
+ int32_t op_errno, inode_t *inode, |
|
+ struct iatt *stbuf, struct iatt *preparent, |
|
+ struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ xlator_t *cached_subvol = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ int ret = -1; |
|
+ unsigned char *gfid = NULL; |
|
+ |
|
+ local = frame->local; |
|
+ if (!local) { |
|
+ op_errno = EINVAL; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ if (op_ret == -1) { |
|
+ local->op_errno = op_errno; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ conf = this->private; |
|
+ if (!conf) { |
|
+ local->op_errno = EINVAL; |
|
+ op_errno = EINVAL; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ cached_subvol = TIER_UNHASHED_SUBVOL; |
|
+ |
|
+ if (local->params) { |
|
+ dict_del(local->params, conf->link_xattr_name); |
|
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY); |
|
+ } |
|
+ |
|
+ /* |
|
+ * We will delete the linkfile if data file creation fails. |
|
+ * When deleting this stale linkfile, there is a possibility |
|
+ * for a race between this linkfile deletion and a stale |
|
+ * linkfile deletion triggered by another lookup from different |
|
+ * client. |
|
+ * |
|
+ * For eg: |
|
+ * |
|
+ * Client 1 Client 2 |
|
+ * |
|
+ * 1 linkfile created for foo |
|
+ * |
|
+ * 2 data file creation failed |
|
+ * |
|
+ * 3 creating a file with same name |
|
+ * |
|
+ * 4 lookup before creation deleted |
|
+ * the linkfile created by client1 |
|
+ * considering as a stale linkfile. |
|
+ * |
|
+ * 5 New linkfile created for foo |
|
+ * with different gfid. |
|
+ * |
|
+ * 6 Trigger linkfile deletion as |
|
+ * data file creation failed. |
|
+ * |
|
+ * 7 Linkfile deleted which is |
|
+ * created by client2. |
|
+ * |
|
+ * 8 Data file created. |
|
+ * |
|
+ * With this race, we will end up having a file in a non-hashed subvol |
|
+ * without a linkfile in hashed subvol. |
|
+ * |
|
+ * To avoid this, we store the gfid of linkfile created by client, So |
|
+ * If we delete the linkfile , we validate gfid of existing file with |
|
+ * stored value from posix layer. |
|
+ * |
|
+ * Storing this value in local->xattr_req as local->params was also used |
|
+ * to create the data file. During the linkfile deletion we will use |
|
+ * local->xattr_req dictionary. |
|
+ */ |
|
+ if (!local->xattr_req) { |
|
+ local->xattr_req = dict_new(); |
|
+ if (!local->xattr_req) { |
|
+ local->op_errno = ENOMEM; |
|
+ op_errno = ENOMEM; |
|
+ goto err; |
|
+ } |
|
+ } |
|
+ |
|
+ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); |
|
+ if (!gfid) { |
|
+ local->op_errno = ENOMEM; |
|
+ op_errno = ENOMEM; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ gf_uuid_copy(gfid, stbuf->ia_gfid); |
|
+ ret = dict_set_dynptr(local->xattr_req, TIER_LINKFILE_GFID, gfid, |
|
+ sizeof(uuid_t)); |
|
+ if (ret) { |
|
+ GF_FREE(gfid); |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, |
|
+ "Failed to set dictionary value" |
|
+ " : key = %s", |
|
+ TIER_LINKFILE_GFID); |
|
+ } |
|
+ |
|
+ STACK_WIND_COOKIE(frame, tier_create_cbk, cached_subvol, cached_subvol, |
|
+ cached_subvol->fops->create, &local->loc, local->flags, |
|
+ local->mode, local->umask, local->fd, local->params); |
|
+ |
|
+ return 0; |
|
+err: |
|
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, |
|
+ NULL); |
|
+ return 0; |
|
+} |
|
+ |
|
+gf_boolean_t |
|
+tier_is_hot_tier_decommissioned(xlator_t *this) |
|
+{ |
|
+ dht_conf_t *conf = NULL; |
|
+ xlator_t *hot_tier = NULL; |
|
+ int i = 0; |
|
+ |
|
+ conf = this->private; |
|
+ hot_tier = conf->subvolumes[1]; |
|
+ |
|
+ if (conf->decommission_subvols_cnt) { |
|
+ for (i = 0; i < conf->subvolume_cnt; i++) { |
|
+ if (conf->decommissioned_bricks[i] && |
|
+ conf->decommissioned_bricks[i] == hot_tier) |
|
+ return _gf_true; |
|
+ } |
|
+ } |
|
+ |
|
+ return _gf_false; |
|
+} |
|
+ |
|
+int |
|
+tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, |
|
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params) |
|
+{ |
|
+ int op_errno = -1; |
|
+ dht_local_t *local = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ xlator_t *hot_subvol = NULL; |
|
+ xlator_t *cold_subvol = NULL; |
|
+ |
|
+ VALIDATE_OR_GOTO(frame, err); |
|
+ VALIDATE_OR_GOTO(this, err); |
|
+ VALIDATE_OR_GOTO(loc, err); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ dht_get_du_info(frame, this, loc); |
|
+ |
|
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE); |
|
+ if (!local) { |
|
+ op_errno = ENOMEM; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ cold_subvol = TIER_HASHED_SUBVOL; |
|
+ hot_subvol = TIER_UNHASHED_SUBVOL; |
|
+ |
|
+ if (conf->subvolumes[0] != cold_subvol) { |
|
+ hot_subvol = conf->subvolumes[0]; |
|
+ } |
|
+ /* |
|
+ * if hot tier full, write to cold. |
|
+ * Also if hot tier is full, create in cold |
|
+ */ |
|
+ if (dht_is_subvol_filled(this, hot_subvol) || |
|
+ tier_is_hot_tier_decommissioned(this)) { |
|
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path, |
|
+ cold_subvol->name); |
|
+ |
|
+ STACK_WIND_COOKIE(frame, tier_create_cbk, cold_subvol, cold_subvol, |
|
+ cold_subvol->fops->create, loc, flags, mode, umask, |
|
+ fd, params); |
|
+ } else { |
|
+ local->params = dict_ref(params); |
|
+ local->flags = flags; |
|
+ local->mode = mode; |
|
+ local->umask = umask; |
|
+ local->cached_subvol = hot_subvol; |
|
+ local->hashed_subvol = cold_subvol; |
|
+ |
|
+ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)", loc->path, |
|
+ hot_subvol->name, cold_subvol->name); |
|
+ |
|
+ dht_linkfile_create(frame, tier_create_linkfile_create_cbk, this, |
|
+ hot_subvol, cold_subvol, loc); |
|
+ |
|
+ goto out; |
|
+ } |
|
+out: |
|
+ return 0; |
|
+ |
|
+err: |
|
+ |
|
+ op_errno = (op_errno == -1) ? errno : op_errno; |
|
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL, |
|
+ NULL); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_unlink_nonhashed_linkfile_cbk(call_frame_t *frame, void *cookie, |
|
+ xlator_t *this, int op_ret, int op_errno, |
|
+ struct iatt *preparent, |
|
+ struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ xlator_t *prev = NULL; |
|
+ |
|
+ local = frame->local; |
|
+ prev = cookie; |
|
+ |
|
+ LOCK(&frame->lock); |
|
+ { |
|
+ if ((op_ret == -1) && (op_errno != ENOENT)) { |
|
+ local->op_errno = op_errno; |
|
+ local->op_ret = op_ret; |
|
+ gf_msg_debug(this->name, op_errno, |
|
+ "Unlink link: subvolume %s" |
|
+ " returned -1", |
|
+ prev->name); |
|
+ goto unlock; |
|
+ } |
|
+ |
|
+ local->op_ret = 0; |
|
+ } |
|
+unlock: |
|
+ UNLOCK(&frame->lock); |
|
+ |
|
+ if (local->op_ret == -1) |
|
+ goto err; |
|
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, |
|
+ &local->preparent, &local->postparent, NULL); |
|
+ |
|
+ return 0; |
|
+ |
|
+err: |
|
+ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int op_ret, int op_errno, inode_t *inode, |
|
+ struct iatt *preparent, dict_t *xdata, |
|
+ struct iatt *postparent) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ xlator_t *prev = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ xlator_t *hot_subvol = NULL; |
|
+ |
|
+ local = frame->local; |
|
+ prev = cookie; |
|
+ conf = this->private; |
|
+ hot_subvol = TIER_UNHASHED_SUBVOL; |
|
+ |
|
+ if (!op_ret) { |
|
+ /* |
|
+ * linkfile present on hot tier. unlinking the linkfile |
|
+ */ |
|
+ STACK_WIND_COOKIE(frame, tier_unlink_nonhashed_linkfile_cbk, hot_subvol, |
|
+ hot_subvol, hot_subvol->fops->unlink, &local->loc, |
|
+ local->flags, NULL); |
|
+ return 0; |
|
+ } |
|
+ |
|
+ LOCK(&frame->lock); |
|
+ { |
|
+ if (op_errno == ENOENT) { |
|
+ local->op_ret = 0; |
|
+ local->op_errno = op_errno; |
|
+ } else { |
|
+ local->op_ret = op_ret; |
|
+ local->op_errno = op_errno; |
|
+ } |
|
+ gf_msg_debug(this->name, op_errno, "Lookup : subvolume %s returned -1", |
|
+ prev->name); |
|
+ } |
|
+ |
|
+ UNLOCK(&frame->lock); |
|
+ |
|
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, |
|
+ &local->preparent, &local->postparent, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int op_ret, int op_errno, struct iatt *preparent, |
|
+ struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ xlator_t *prev = NULL; |
|
+ |
|
+ local = frame->local; |
|
+ prev = cookie; |
|
+ |
|
+ LOCK(&frame->lock); |
|
+ { |
|
+ /* Ignore EINVAL for tier to ignore error when the file |
|
+ does not exist on the other tier */ |
|
+ if ((op_ret == -1) && !((op_errno == ENOENT) || (op_errno == EINVAL))) { |
|
+ local->op_errno = op_errno; |
|
+ local->op_ret = op_ret; |
|
+ gf_msg_debug(this->name, op_errno, |
|
+ "Unlink link: subvolume %s" |
|
+ " returned -1", |
|
+ prev->name); |
|
+ goto unlock; |
|
+ } |
|
+ |
|
+ local->op_ret = 0; |
|
+ } |
|
+unlock: |
|
+ UNLOCK(&frame->lock); |
|
+ |
|
+ if (local->op_ret == -1) |
|
+ goto err; |
|
+ |
|
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, |
|
+ &local->preparent, &local->postparent, xdata); |
|
+ |
|
+ return 0; |
|
+ |
|
+err: |
|
+ DHT_STACK_UNWIND(unlink, frame, -1, local->op_errno, NULL, NULL, NULL); |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+tier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, struct iatt *preparent, struct iatt *postparent, |
|
+ dict_t *xdata) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ xlator_t *prev = NULL; |
|
+ struct iatt *stbuf = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ int ret = -1; |
|
+ xlator_t *hot_tier = NULL; |
|
+ xlator_t *cold_tier = NULL; |
|
+ |
|
+ local = frame->local; |
|
+ prev = cookie; |
|
+ conf = this->private; |
|
+ |
|
+ cold_tier = TIER_HASHED_SUBVOL; |
|
+ hot_tier = TIER_UNHASHED_SUBVOL; |
|
+ |
|
+ LOCK(&frame->lock); |
|
+ { |
|
+ if (op_ret == -1) { |
|
+ if (op_errno == ENOENT) { |
|
+ local->op_ret = 0; |
|
+ } else { |
|
+ local->op_ret = -1; |
|
+ local->op_errno = op_errno; |
|
+ } |
|
+ gf_msg_debug(this->name, op_errno, |
|
+ "Unlink: subvolume %s returned -1" |
|
+ " with errno = %d", |
|
+ prev->name, op_errno); |
|
+ goto unlock; |
|
+ } |
|
+ |
|
+ local->op_ret = 0; |
|
+ |
|
+ local->postparent = *postparent; |
|
+ local->preparent = *preparent; |
|
+ |
|
+ if (local->loc.parent) { |
|
+ dht_inode_ctx_time_update(local->loc.parent, this, |
|
+ &local->preparent, 0); |
|
+ dht_inode_ctx_time_update(local->loc.parent, this, |
|
+ &local->postparent, 1); |
|
+ } |
|
+ } |
|
+unlock: |
|
+ UNLOCK(&frame->lock); |
|
+ |
|
+ if (local->op_ret) |
|
+ goto out; |
|
+ |
|
+ if (cold_tier != local->cached_subvol) { |
|
+ /* |
|
+ * File is present in hot tier, so there will be |
|
+ * a link file on cold tier, deleting the linkfile |
|
+ * from cold tier |
|
+ */ |
|
+ STACK_WIND_COOKIE(frame, tier_unlink_linkfile_cbk, cold_tier, cold_tier, |
|
+ cold_tier->fops->unlink, &local->loc, local->flags, |
|
+ xdata); |
|
+ return 0; |
|
+ } |
|
+ |
|
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf); |
|
+ if (!ret && stbuf && |
|
+ ((IS_DHT_MIGRATION_PHASE2(stbuf)) || IS_DHT_MIGRATION_PHASE1(stbuf))) { |
|
+ /* |
|
+ * File is migrating from cold to hot tier. |
|
+ * Delete the destination linkfile. |
|
+ */ |
|
+ STACK_WIND_COOKIE(frame, tier_unlink_lookup_cbk, hot_tier, hot_tier, |
|
+ hot_tier->fops->lookup, &local->loc, NULL); |
|
+ return 0; |
|
+ } |
|
+ |
|
+out: |
|
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, |
|
+ &local->preparent, &local->postparent, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, |
|
+ dict_t *xdata) |
|
+{ |
|
+ xlator_t *cached_subvol = NULL; |
|
+ xlator_t *hashed_subvol = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ int op_errno = -1; |
|
+ dht_local_t *local = NULL; |
|
+ int ret = -1; |
|
+ |
|
+ VALIDATE_OR_GOTO(frame, err); |
|
+ VALIDATE_OR_GOTO(this, err); |
|
+ VALIDATE_OR_GOTO(loc, err); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK); |
|
+ if (!local) { |
|
+ op_errno = ENOMEM; |
|
+ |
|
+ goto err; |
|
+ } |
|
+ |
|
+ hashed_subvol = TIER_HASHED_SUBVOL; |
|
+ |
|
+ cached_subvol = local->cached_subvol; |
|
+ if (!cached_subvol) { |
|
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s", |
|
+ loc->path); |
|
+ op_errno = EINVAL; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ local->flags = xflag; |
|
+ if (IA_ISREG(loc->inode->ia_type) && (hashed_subvol == cached_subvol)) { |
|
+ /* |
|
+ * File resides in cold tier. We need to stat |
|
+ * the file to see if it is being promoted. |
|
+ * If yes we need to delete the destination |
|
+ * file as well. |
|
+ * |
|
+ * Currently we are doing this check only for |
|
+ * regular files. |
|
+ */ |
|
+ xdata = xdata ? dict_ref(xdata) : dict_new(); |
|
+ if (xdata) { |
|
+ ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1); |
|
+ if (ret) { |
|
+ gf_msg_debug(this->name, 0, "Failed to set dictionary key %s", |
|
+ DHT_IATT_IN_XDATA_KEY); |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+ /* |
|
+ * File is on hot tier, delete the data file first, then |
|
+ * linkfile from cold. |
|
+ */ |
|
+ STACK_WIND_COOKIE(frame, tier_unlink_cbk, cached_subvol, cached_subvol, |
|
+ cached_subvol->fops->unlink, loc, xflag, xdata); |
|
+ if (xdata) |
|
+ dict_unref(xdata); |
|
+ return 0; |
|
+err: |
|
+ op_errno = (op_errno == -1) ? errno : op_errno; |
|
+ DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) |
|
+{ |
|
+ gf_dirent_t entries; |
|
+ gf_dirent_t *orig_entry = NULL; |
|
+ gf_dirent_t *entry = NULL; |
|
+ int count = 0; |
|
+ |
|
+ INIT_LIST_HEAD(&entries.list); |
|
+ |
|
+ if (op_ret < 0) |
|
+ goto unwind; |
|
+ |
|
+ list_for_each_entry(orig_entry, (&orig_entries->list), list) |
|
+ { |
|
+ entry = gf_dirent_for_name(orig_entry->d_name); |
|
+ if (!entry) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, |
|
+ "Memory allocation failed "); |
|
+ goto unwind; |
|
+ } |
|
+ |
|
+ entry->d_off = orig_entry->d_off; |
|
+ entry->d_ino = orig_entry->d_ino; |
|
+ entry->d_type = orig_entry->d_type; |
|
+ entry->d_len = orig_entry->d_len; |
|
+ |
|
+ list_add_tail(&entry->list, &entries.list); |
|
+ count++; |
|
+ } |
|
+ op_ret = count; |
|
+ |
|
+unwind: |
|
+ if (op_ret < 0) |
|
+ op_ret = 0; |
|
+ |
|
+ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL); |
|
+ |
|
+ gf_dirent_free(&entries); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ gf_dirent_t entries; |
|
+ gf_dirent_t *orig_entry = NULL; |
|
+ gf_dirent_t *entry = NULL; |
|
+ xlator_t *prev = NULL; |
|
+ xlator_t *next_subvol = NULL; |
|
+ off_t next_offset = 0; |
|
+ int count = 0; |
|
+ dht_conf_t *conf = NULL; |
|
+ int ret = 0; |
|
+ inode_table_t *itable = NULL; |
|
+ inode_t *inode = NULL; |
|
+ |
|
+ INIT_LIST_HEAD(&entries.list); |
|
+ prev = cookie; |
|
+ local = frame->local; |
|
+ itable = local->fd ? local->fd->inode->table : NULL; |
|
+ |
|
+ conf = this->private; |
|
+ GF_VALIDATE_OR_GOTO(this->name, conf, unwind); |
|
+ |
|
+ if (op_ret < 0) |
|
+ goto done; |
|
+ |
|
+ list_for_each_entry(orig_entry, (&orig_entries->list), list) |
|
+ { |
|
+ next_offset = orig_entry->d_off; |
|
+ |
|
+ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) { |
|
+ /*stat failed somewhere- ignore this entry*/ |
|
+ continue; |
|
+ } |
|
+ |
|
+ entry = gf_dirent_for_name(orig_entry->d_name); |
|
+ if (!entry) { |
|
+ goto unwind; |
|
+ } |
|
+ |
|
+ entry->d_off = orig_entry->d_off; |
|
+ entry->d_stat = orig_entry->d_stat; |
|
+ entry->d_ino = orig_entry->d_ino; |
|
+ entry->d_type = orig_entry->d_type; |
|
+ entry->d_len = orig_entry->d_len; |
|
+ |
|
+ if (orig_entry->dict) |
|
+ entry->dict = dict_ref(orig_entry->dict); |
|
+ |
|
+ if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict, |
|
+ conf->link_xattr_name)) { |
|
+ goto entries; |
|
+ |
|
+ } else if (IA_ISDIR(entry->d_stat.ia_type)) { |
|
+ if (orig_entry->inode) { |
|
+ dht_inode_ctx_time_update(orig_entry->inode, this, |
|
+ &entry->d_stat, 1); |
|
+ } |
|
+ } else { |
|
+ if (orig_entry->inode) { |
|
+ ret = dht_layout_preset(this, prev, orig_entry->inode); |
|
+ if (ret) |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, |
|
+ DHT_MSG_LAYOUT_SET_FAILED, |
|
+ "failed to link the layout " |
|
+ "in inode"); |
|
+ |
|
+ entry->inode = inode_ref(orig_entry->inode); |
|
+ } else if (itable) { |
|
+ /* |
|
+ * orig_entry->inode might be null if any upper |
|
+ * layer xlators below client set to null, to |
|
+ * force a lookup on the inode even if the inode |
|
+ * is present in the inode table. In that case |
|
+ * we just update the ctx to make sure we didn't |
|
+ * missed anything. |
|
+ */ |
|
+ inode = inode_find(itable, orig_entry->d_stat.ia_gfid); |
|
+ if (inode) { |
|
+ ret = dht_layout_preset(this, TIER_HASHED_SUBVOL, inode); |
|
+ if (ret) |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, |
|
+ DHT_MSG_LAYOUT_SET_FAILED, |
|
+ "failed to link the layout" |
|
+ " in inode"); |
|
+ inode_unref(inode); |
|
+ inode = NULL; |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+ entries: |
|
+ list_add_tail(&entry->list, &entries.list); |
|
+ count++; |
|
+ } |
|
+ op_ret = count; |
|
+ |
|
+done: |
|
+ if (count == 0) { |
|
+ /* non-zero next_offset means that |
|
+ EOF is not yet hit on the current subvol |
|
+ */ |
|
+ if (next_offset != 0) { |
|
+ next_subvol = prev; |
|
+ } else { |
|
+ goto unwind; |
|
+ } |
|
+ |
|
+ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, next_subvol, next_subvol, |
|
+ next_subvol->fops->readdirp, local->fd, local->size, |
|
+ next_offset, local->xattr); |
|
+ return 0; |
|
+ } |
|
+ |
|
+unwind: |
|
+ if (op_ret < 0) |
|
+ op_ret = 0; |
|
+ |
|
+ DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL); |
|
+ |
|
+ gf_dirent_free(&entries); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, |
|
+ off_t yoff, int whichop, dict_t *dict) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ int op_errno = -1; |
|
+ xlator_t *hashed_subvol = NULL; |
|
+ int ret = 0; |
|
+ dht_conf_t *conf = NULL; |
|
+ |
|
+ VALIDATE_OR_GOTO(frame, err); |
|
+ VALIDATE_OR_GOTO(this, err); |
|
+ VALIDATE_OR_GOTO(fd, err); |
|
+ VALIDATE_OR_GOTO(this->private, err); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ local = dht_local_init(frame, NULL, NULL, whichop); |
|
+ if (!local) { |
|
+ op_errno = ENOMEM; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ local->fd = fd_ref(fd); |
|
+ local->size = size; |
|
+ local->xattr_req = (dict) ? dict_ref(dict) : NULL; |
|
+ |
|
+ hashed_subvol = TIER_HASHED_SUBVOL; |
|
+ |
|
+ /* TODO: do proper readdir */ |
|
+ if (whichop == GF_FOP_READDIRP) { |
|
+ if (dict) |
|
+ local->xattr = dict_ref(dict); |
|
+ else |
|
+ local->xattr = dict_new(); |
|
+ |
|
+ if (local->xattr) { |
|
+ ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256); |
|
+ if (ret) |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED, |
|
+ "Failed to set dictionary value" |
|
+ " : key = %s", |
|
+ conf->link_xattr_name); |
|
+ } |
|
+ |
|
+ STACK_WIND_COOKIE(frame, tier_readdirp_cbk, hashed_subvol, |
|
+ hashed_subvol, hashed_subvol->fops->readdirp, fd, |
|
+ size, yoff, local->xattr); |
|
+ |
|
+ } else { |
|
+ STACK_WIND_COOKIE(frame, tier_readdir_cbk, hashed_subvol, hashed_subvol, |
|
+ hashed_subvol->fops->readdir, fd, size, yoff, |
|
+ local->xattr); |
|
+ } |
|
+ |
|
+ return 0; |
|
+ |
|
+err: |
|
+ op_errno = (op_errno == -1) ? errno : op_errno; |
|
+ DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, |
|
+ off_t yoff, dict_t *xdata) |
|
+{ |
|
+ int op = GF_FOP_READDIR; |
|
+ dht_conf_t *conf = NULL; |
|
+ int i = 0; |
|
+ |
|
+ conf = this->private; |
|
+ if (!conf) |
|
+ goto out; |
|
+ |
|
+ for (i = 0; i < conf->subvolume_cnt; i++) { |
|
+ if (!conf->subvolume_status[i]) { |
|
+ op = GF_FOP_READDIRP; |
|
+ break; |
|
+ } |
|
+ } |
|
+ |
|
+ if (conf->use_readdirp) |
|
+ op = GF_FOP_READDIRP; |
|
+ |
|
+out: |
|
+ tier_do_readdir(frame, this, fd, size, yoff, op, 0); |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, |
|
+ off_t yoff, dict_t *dict) |
|
+{ |
|
+ tier_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict); |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, struct statvfs *statvfs, dict_t *xdata) |
|
+{ |
|
+ gf_boolean_t event = _gf_false; |
|
+ qdstatfs_action_t action = qdstatfs_action_OFF; |
|
+ dht_local_t *local = NULL; |
|
+ int this_call_cnt = 0; |
|
+ int bsize = 0; |
|
+ int frsize = 0; |
|
+ GF_UNUSED int ret = 0; |
|
+ unsigned long new_usage = 0; |
|
+ unsigned long cur_usage = 0; |
|
+ xlator_t *prev = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ tier_statvfs_t *tier_stat = NULL; |
|
+ |
|
+ prev = cookie; |
|
+ local = frame->local; |
|
+ GF_ASSERT(local); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ if (xdata) |
|
+ ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event); |
|
+ |
|
+ tier_stat = &local->tier_statvfs; |
|
+ |
|
+ LOCK(&frame->lock); |
|
+ { |
|
+ if (op_ret == -1) { |
|
+ local->op_errno = op_errno; |
|
+ goto unlock; |
|
+ } |
|
+ if (!statvfs) { |
|
+ op_errno = EINVAL; |
|
+ local->op_ret = -1; |
|
+ goto unlock; |
|
+ } |
|
+ local->op_ret = 0; |
|
+ |
|
+ if (local->quota_deem_statfs) { |
|
+ if (event == _gf_true) { |
|
+ action = qdstatfs_action_COMPARE; |
|
+ } else { |
|
+ action = qdstatfs_action_NEGLECT; |
|
+ } |
|
+ } else { |
|
+ if (event == _gf_true) { |
|
+ action = qdstatfs_action_REPLACE; |
|
+ local->quota_deem_statfs = _gf_true; |
|
+ } |
|
+ } |
|
+ |
|
+ if (local->quota_deem_statfs) { |
|
+ switch (action) { |
|
+ case qdstatfs_action_NEGLECT: |
|
+ goto unlock; |
|
+ |
|
+ case qdstatfs_action_REPLACE: |
|
+ local->statvfs = *statvfs; |
|
+ goto unlock; |
|
+ |
|
+ case qdstatfs_action_COMPARE: |
|
+ new_usage = statvfs->f_blocks - statvfs->f_bfree; |
|
+ cur_usage = local->statvfs.f_blocks - |
|
+ local->statvfs.f_bfree; |
|
+ |
|
+ /* Take the max of the usage from subvols */ |
|
+ if (new_usage >= cur_usage) |
|
+ local->statvfs = *statvfs; |
|
+ goto unlock; |
|
+ |
|
+ default: |
|
+ break; |
|
+ } |
|
+ } |
|
+ |
|
+ if (local->statvfs.f_bsize != 0) { |
|
+ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize); |
|
+ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize); |
|
+ dht_normalize_stats(&local->statvfs, bsize, frsize); |
|
+ dht_normalize_stats(statvfs, bsize, frsize); |
|
+ } else { |
|
+ local->statvfs.f_bsize = statvfs->f_bsize; |
|
+ local->statvfs.f_frsize = statvfs->f_frsize; |
|
+ } |
|
+ |
|
+ if (prev == TIER_HASHED_SUBVOL) { |
|
+ local->statvfs.f_blocks = statvfs->f_blocks; |
|
+ local->statvfs.f_files = statvfs->f_files; |
|
+ local->statvfs.f_fsid = statvfs->f_fsid; |
|
+ local->statvfs.f_flag = statvfs->f_flag; |
|
+ local->statvfs.f_namemax = statvfs->f_namemax; |
|
+ tier_stat->blocks_used = (statvfs->f_blocks - statvfs->f_bfree); |
|
+ tier_stat->pblocks_used = (statvfs->f_blocks - statvfs->f_bavail); |
|
+ tier_stat->files_used = (statvfs->f_files - statvfs->f_ffree); |
|
+ tier_stat->pfiles_used = (statvfs->f_files - statvfs->f_favail); |
|
+ tier_stat->hashed_fsid = statvfs->f_fsid; |
|
+ } else { |
|
+ tier_stat->unhashed_fsid = statvfs->f_fsid; |
|
+ tier_stat->unhashed_blocks_used = (statvfs->f_blocks - |
|
+ statvfs->f_bfree); |
|
+ tier_stat->unhashed_pblocks_used = (statvfs->f_blocks - |
|
+ statvfs->f_bavail); |
|
+ tier_stat->unhashed_files_used = (statvfs->f_files - |
|
+ statvfs->f_ffree); |
|
+ tier_stat->unhashed_pfiles_used = (statvfs->f_files - |
|
+ statvfs->f_favail); |
|
+ } |
|
+ } |
|
+unlock: |
|
+ UNLOCK(&frame->lock); |
|
+ |
|
+ this_call_cnt = dht_frame_return(frame); |
|
+ if (is_last_call(this_call_cnt)) { |
|
+ if (tier_stat->unhashed_fsid != tier_stat->hashed_fsid) { |
|
+ tier_stat->blocks_used += tier_stat->unhashed_blocks_used; |
|
+ tier_stat->pblocks_used += tier_stat->unhashed_pblocks_used; |
|
+ tier_stat->files_used += tier_stat->unhashed_files_used; |
|
+ tier_stat->pfiles_used += tier_stat->unhashed_pfiles_used; |
|
+ } |
|
+ local->statvfs.f_bfree = local->statvfs.f_blocks - |
|
+ tier_stat->blocks_used; |
|
+ local->statvfs.f_bavail = local->statvfs.f_blocks - |
|
+ tier_stat->pblocks_used; |
|
+ local->statvfs.f_ffree = local->statvfs.f_files - tier_stat->files_used; |
|
+ local->statvfs.f_favail = local->statvfs.f_files - |
|
+ tier_stat->pfiles_used; |
|
+ DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno, |
|
+ &local->statvfs, xdata); |
|
+ } |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ int op_errno = -1; |
|
+ int i = -1; |
|
+ inode_t *inode = NULL; |
|
+ inode_table_t *itable = NULL; |
|
+ uuid_t root_gfid = { |
|
+ 0, |
|
+ }; |
|
+ loc_t newloc = { |
|
+ 0, |
|
+ }; |
|
+ |
|
+ VALIDATE_OR_GOTO(frame, err); |
|
+ VALIDATE_OR_GOTO(this, err); |
|
+ VALIDATE_OR_GOTO(loc, err); |
|
+ VALIDATE_OR_GOTO(this->private, err); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS); |
|
+ if (!local) { |
|
+ op_errno = ENOMEM; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) { |
|
+ itable = loc->inode->table; |
|
+ if (!itable) { |
|
+ op_errno = EINVAL; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ loc = &local->loc2; |
|
+ root_gfid[15] = 1; |
|
+ |
|
+ inode = inode_find(itable, root_gfid); |
|
+ if (!inode) { |
|
+ op_errno = EINVAL; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ dht_build_root_loc(inode, &newloc); |
|
+ loc = &newloc; |
|
+ } |
|
+ |
|
+ local->call_cnt = conf->subvolume_cnt; |
|
+ |
|
+ for (i = 0; i < conf->subvolume_cnt; i++) { |
|
+ STACK_WIND_COOKIE(frame, tier_statfs_cbk, conf->subvolumes[i], |
|
+ conf->subvolumes[i], |
|
+ conf->subvolumes[i]->fops->statfs, loc, xdata); |
|
+ } |
|
+ |
|
+ return 0; |
|
+ |
|
+err: |
|
+ op_errno = (op_errno == -1) ? errno : op_errno; |
|
+ DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL); |
|
+ |
|
+ return 0; |
|
+} |
|
diff --git a/xlators/cluster/dht/src/tier-common.h b/xlators/cluster/dht/src/tier-common.h |
|
new file mode 100644 |
|
index 0000000..b1ebaa8 |
|
--- /dev/null |
|
+++ b/xlators/cluster/dht/src/tier-common.h |
|
@@ -0,0 +1,55 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#ifndef _TIER_COMMON_H_ |
|
+#define _TIER_COMMON_H_ |
|
+/* Function definitions */ |
|
+int |
|
+tier_create_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie, |
|
+ xlator_t *this, int op_ret, int op_errno, |
|
+ struct iatt *preparent, |
|
+ struct iatt *postparent, dict_t *xdata); |
|
+ |
|
+int |
|
+tier_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, |
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata); |
|
+ |
|
+int |
|
+tier_create_linkfile_create_cbk(call_frame_t *frame, void *cookie, |
|
+ xlator_t *this, int32_t op_ret, |
|
+ int32_t op_errno, inode_t *inode, |
|
+ struct iatt *stbuf, struct iatt *preparent, |
|
+ struct iatt *postparent, dict_t *xdata); |
|
+ |
|
+int |
|
+tier_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, |
|
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params); |
|
+ |
|
+int32_t |
|
+tier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, |
|
+ dict_t *xdata); |
|
+ |
|
+int32_t |
|
+tier_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, |
|
+ off_t off, dict_t *dict); |
|
+ |
|
+int |
|
+tier_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, |
|
+ off_t yoff, dict_t *xdata); |
|
+ |
|
+int |
|
+tier_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, |
|
+ dict_t *xdata); |
|
+ |
|
+int |
|
+tier_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); |
|
+ |
|
+#endif |
|
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c |
|
new file mode 100644 |
|
index 0000000..94b4c63 |
|
--- /dev/null |
|
+++ b/xlators/cluster/dht/src/tier.c |
|
@@ -0,0 +1,3105 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#include <dlfcn.h> |
|
+ |
|
+#include "dht-common.h" |
|
+#include "tier.h" |
|
+#include "tier-common.h" |
|
+#include <glusterfs/syscall.h> |
|
+#include <glusterfs/events.h> |
|
+#include "tier-ctr-interface.h" |
|
+ |
|
+/*Hard coded DB info*/ |
|
+static gfdb_db_type_t dht_tier_db_type = GFDB_SQLITE3; |
|
+/*Hard coded DB info*/ |
|
+ |
|
+/*Mutex for updating the data movement stats*/ |
|
+static pthread_mutex_t dm_stat_mutex = PTHREAD_MUTEX_INITIALIZER; |
|
+ |
|
+/* Stores the path location of promotion query files */ |
|
+static char *promotion_qfile; |
|
+/* Stores the path location of demotion query files */ |
|
+static char *demotion_qfile; |
|
+ |
|
+static void *libhandle; |
|
+static gfdb_methods_t gfdb_methods; |
|
+ |
|
+#define DB_QUERY_RECORD_SIZE 4096 |
|
+ |
|
+/* |
|
+ * Closes all the fds and frees the qfile_array |
|
+ * */ |
|
+static void |
|
+qfile_array_free(tier_qfile_array_t *qfile_array) |
|
+{ |
|
+ ssize_t i = 0; |
|
+ |
|
+ if (qfile_array) { |
|
+ if (qfile_array->fd_array) { |
|
+ for (i = 0; i < qfile_array->array_size; i++) { |
|
+ if (qfile_array->fd_array[i] != -1) { |
|
+ sys_close(qfile_array->fd_array[i]); |
|
+ } |
|
+ } |
|
+ } |
|
+ GF_FREE(qfile_array->fd_array); |
|
+ } |
|
+ GF_FREE(qfile_array); |
|
+} |
|
+ |
|
+/* Create a new query file list with given size */ |
|
+static tier_qfile_array_t * |
|
+qfile_array_new(ssize_t array_size) |
|
+{ |
|
+ int ret = -1; |
|
+ tier_qfile_array_t *qfile_array = NULL; |
|
+ ssize_t i = 0; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", (array_size > 0), out); |
|
+ |
|
+ qfile_array = GF_CALLOC(1, sizeof(tier_qfile_array_t), |
|
+ gf_tier_mt_qfile_array_t); |
|
+ if (!qfile_array) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to allocate memory for tier_qfile_array_t"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ qfile_array->fd_array = GF_MALLOC(array_size * sizeof(int), |
|
+ gf_dht_mt_int32_t); |
|
+ if (!qfile_array->fd_array) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to allocate memory for " |
|
+ "tier_qfile_array_t->fd_array"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* Init all the fds to -1 */ |
|
+ for (i = 0; i < array_size; i++) { |
|
+ qfile_array->fd_array[i] = -1; |
|
+ } |
|
+ |
|
+ qfile_array->array_size = array_size; |
|
+ qfile_array->next_index = 0; |
|
+ |
|
+ /* Set exhausted count to list size as the list is empty */ |
|
+ qfile_array->exhausted_count = qfile_array->array_size; |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ if (ret) { |
|
+ qfile_array_free(qfile_array); |
|
+ qfile_array = NULL; |
|
+ } |
|
+ return qfile_array; |
|
+} |
|
+ |
|
+/* Checks if the query file list is empty or totally exhausted. */ |
|
+static gf_boolean_t |
|
+is_qfile_array_empty(tier_qfile_array_t *qfile_array) |
|
+{ |
|
+ return (qfile_array->exhausted_count == qfile_array->array_size) |
|
+ ? _gf_true |
|
+ : _gf_false; |
|
+} |
|
+ |
|
+/* Shifts the next_fd pointer to the next available fd in the list */ |
|
+static void |
|
+shift_next_index(tier_qfile_array_t *qfile_array) |
|
+{ |
|
+ int qfile_fd = 0; |
|
+ int spin_count = 0; |
|
+ |
|
+ if (is_qfile_array_empty(qfile_array)) { |
|
+ return; |
|
+ } |
|
+ |
|
+ do { |
|
+ /* change next_index in a rotional manner */ |
|
+ (qfile_array->next_index == (qfile_array->array_size - 1)) |
|
+ ? qfile_array->next_index = 0 |
|
+ : qfile_array->next_index++; |
|
+ |
|
+ qfile_fd = (qfile_array->fd_array[qfile_array->next_index]); |
|
+ |
|
+ spin_count++; |
|
+ |
|
+ } while ((qfile_fd == -1) && (spin_count < qfile_array->array_size)); |
|
+} |
|
+ |
|
+/* |
|
+ * This is a non-thread safe function to read query records |
|
+ * from a list of query files in a Round-Robin manner. |
|
+ * As in when the query files get exhuasted they are closed. |
|
+ * Returns: |
|
+ * 0 if all the query records in all the query files of the list are |
|
+ * exhausted. |
|
+ * > 0 if a query record is successfully read. Indicates the size of the query |
|
+ * record read. |
|
+ * < 0 if there was failure |
|
+ * */ |
|
+static int |
|
+read_query_record_list(tier_qfile_array_t *qfile_array, |
|
+ gfdb_query_record_t **query_record) |
|
+{ |
|
+ int ret = -1; |
|
+ int qfile_fd = 0; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", qfile_array, out); |
|
+ GF_VALIDATE_OR_GOTO("tier", qfile_array->fd_array, out); |
|
+ |
|
+ do { |
|
+ if (is_qfile_array_empty(qfile_array)) { |
|
+ ret = 0; |
|
+ break; |
|
+ } |
|
+ |
|
+ qfile_fd = qfile_array->fd_array[qfile_array->next_index]; |
|
+ ret = gfdb_methods.gfdb_read_query_record(qfile_fd, query_record); |
|
+ if (ret <= 0) { |
|
+ /*The qfile_fd has reached EOF or |
|
+ * there was an error. |
|
+ * 1. Close the exhausted fd |
|
+ * 2. increment the exhausted count |
|
+ * 3. shift next_qfile to next qfile |
|
+ **/ |
|
+ sys_close(qfile_fd); |
|
+ qfile_array->fd_array[qfile_array->next_index] = -1; |
|
+ qfile_array->exhausted_count++; |
|
+ /* shift next_qfile to next qfile */ |
|
+ shift_next_index(qfile_array); |
|
+ continue; |
|
+ } else { |
|
+ /* shift next_qfile to next qfile */ |
|
+ shift_next_index(qfile_array); |
|
+ break; |
|
+ } |
|
+ } while (1); |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+/* Check and update the watermark every WM_INTERVAL seconds */ |
|
+#define WM_INTERVAL 5 |
|
+#define WM_INTERVAL_EMERG 1 |
|
+ |
|
+static int |
|
+tier_check_same_node(xlator_t *this, loc_t *loc, gf_defrag_info_t *defrag) |
|
+{ |
|
+ int ret = -1; |
|
+ dict_t *dict = NULL; |
|
+ char *uuid_str = NULL; |
|
+ uuid_t node_uuid = { |
|
+ 0, |
|
+ }; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", this, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, loc, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, defrag, out); |
|
+ |
|
+ if (syncop_getxattr(this, loc, &dict, GF_XATTR_NODE_UUID_KEY, NULL, NULL)) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Unable to get NODE_UUID_KEY %s %s\n", loc->name, loc->path); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (dict_get_str(dict, GF_XATTR_NODE_UUID_KEY, &uuid_str) < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to get node-uuids for %s", loc->path); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (gf_uuid_parse(uuid_str, node_uuid)) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "uuid_parse failed for %s", loc->path); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (gf_uuid_compare(node_uuid, defrag->node_uuid)) { |
|
+ gf_msg_debug(this->name, 0, "%s does not belong to this node", |
|
+ loc->path); |
|
+ ret = 1; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ if (dict) |
|
+ dict_unref(dict); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+tier_get_fs_stat(xlator_t *this, loc_t *root_loc) |
|
+{ |
|
+ int ret = 0; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ dict_t *xdata = NULL; |
|
+ struct statvfs statfs = { |
|
+ 0, |
|
+ }; |
|
+ gf_tier_conf_t *tier_conf = NULL; |
|
+ |
|
+ conf = this->private; |
|
+ if (!conf) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "conf is NULL"); |
|
+ ret = -1; |
|
+ goto exit; |
|
+ } |
|
+ |
|
+ defrag = conf->defrag; |
|
+ if (!defrag) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "defrag is NULL"); |
|
+ ret = -1; |
|
+ goto exit; |
|
+ } |
|
+ |
|
+ tier_conf = &defrag->tier_conf; |
|
+ |
|
+ xdata = dict_new(); |
|
+ if (!xdata) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, |
|
+ "failed to allocate dictionary"); |
|
+ ret = -1; |
|
+ goto exit; |
|
+ } |
|
+ |
|
+ ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, |
|
+ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict"); |
|
+ ret = -1; |
|
+ goto exit; |
|
+ } |
|
+ |
|
+ /* Find how much free space is on the hot subvolume. |
|
+ * Then see if that value */ |
|
+ /* is less than or greater than user defined watermarks. |
|
+ * Stash results in */ |
|
+ /* the tier_conf data structure. */ |
|
+ |
|
+ ret = syncop_statfs(conf->subvolumes[1], root_loc, &statfs, xdata, NULL); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Unable to obtain statfs."); |
|
+ goto exit; |
|
+ } |
|
+ |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ |
|
+ tier_conf->block_size = statfs.f_bsize; |
|
+ tier_conf->blocks_total = statfs.f_blocks; |
|
+ tier_conf->blocks_used = statfs.f_blocks - statfs.f_bfree; |
|
+ |
|
+ tier_conf->percent_full = GF_PERCENTAGE(tier_conf->blocks_used, |
|
+ statfs.f_blocks); |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ |
|
+exit: |
|
+ if (xdata) |
|
+ dict_unref(xdata); |
|
+ return ret; |
|
+} |
|
+ |
|
+static void |
|
+tier_send_watermark_event(const char *volname, tier_watermark_op_t old_wm, |
|
+ tier_watermark_op_t new_wm) |
|
+{ |
|
+ if (old_wm == TIER_WM_LOW || old_wm == TIER_WM_NONE) { |
|
+ if (new_wm == TIER_WM_MID) { |
|
+ gf_event(EVENT_TIER_WATERMARK_RAISED_TO_MID, "vol=%s", volname); |
|
+ } else if (new_wm == TIER_WM_HI) { |
|
+ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); |
|
+ } |
|
+ } else if (old_wm == TIER_WM_MID) { |
|
+ if (new_wm == TIER_WM_LOW) { |
|
+ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); |
|
+ } else if (new_wm == TIER_WM_HI) { |
|
+ gf_event(EVENT_TIER_WATERMARK_HI, "vol=%s", volname); |
|
+ } |
|
+ } else if (old_wm == TIER_WM_HI) { |
|
+ if (new_wm == TIER_WM_MID) { |
|
+ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_MID, "vol=%s", volname); |
|
+ } else if (new_wm == TIER_WM_LOW) { |
|
+ gf_event(EVENT_TIER_WATERMARK_DROPPED_TO_LOW, "vol=%s", volname); |
|
+ } |
|
+ } |
|
+} |
|
+ |
|
+int |
|
+tier_check_watermark(xlator_t *this) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ gf_tier_conf_t *tier_conf = NULL; |
|
+ tier_watermark_op_t wm = TIER_WM_NONE; |
|
+ |
|
+ conf = this->private; |
|
+ if (!conf) |
|
+ goto exit; |
|
+ |
|
+ defrag = conf->defrag; |
|
+ if (!defrag) |
|
+ goto exit; |
|
+ |
|
+ tier_conf = &defrag->tier_conf; |
|
+ |
|
+ if (tier_conf->percent_full < tier_conf->watermark_low) { |
|
+ wm = TIER_WM_LOW; |
|
+ |
|
+ } else if (tier_conf->percent_full < tier_conf->watermark_hi) { |
|
+ wm = TIER_WM_MID; |
|
+ |
|
+ } else { |
|
+ wm = TIER_WM_HI; |
|
+ } |
|
+ |
|
+ if (wm != tier_conf->watermark_last) { |
|
+ tier_send_watermark_event(tier_conf->volname, tier_conf->watermark_last, |
|
+ wm); |
|
+ |
|
+ tier_conf->watermark_last = wm; |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Tier watermark now %d", wm); |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+exit: |
|
+ return ret; |
|
+} |
|
+ |
|
+static gf_boolean_t |
|
+is_hot_tier_full(gf_tier_conf_t *tier_conf) |
|
+{ |
|
+ if (tier_conf && (tier_conf->mode == TIER_MODE_WM) && |
|
+ (tier_conf->watermark_last == TIER_WM_HI)) |
|
+ return _gf_true; |
|
+ |
|
+ return _gf_false; |
|
+} |
|
+ |
|
+int |
|
+tier_do_migration(xlator_t *this, int promote) |
|
+{ |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ long rand = 0; |
|
+ int migrate = 0; |
|
+ gf_tier_conf_t *tier_conf = NULL; |
|
+ |
|
+ conf = this->private; |
|
+ if (!conf) |
|
+ goto exit; |
|
+ |
|
+ defrag = conf->defrag; |
|
+ if (!defrag) |
|
+ goto exit; |
|
+ |
|
+ if (tier_check_watermark(this) != 0) { |
|
+ gf_msg(this->name, GF_LOG_CRITICAL, errno, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to get watermark"); |
|
+ goto exit; |
|
+ } |
|
+ |
|
+ tier_conf = &defrag->tier_conf; |
|
+ |
|
+ switch (tier_conf->watermark_last) { |
|
+ case TIER_WM_LOW: |
|
+ migrate = promote ? 1 : 0; |
|
+ break; |
|
+ case TIER_WM_HI: |
|
+ migrate = promote ? 0 : 1; |
|
+ break; |
|
+ case TIER_WM_MID: |
|
+ /* coverity[DC.WEAK_CRYPTO] */ |
|
+ rand = random() % 100; |
|
+ if (promote) { |
|
+ migrate = (rand > tier_conf->percent_full); |
|
+ } else { |
|
+ migrate = (rand <= tier_conf->percent_full); |
|
+ } |
|
+ break; |
|
+ } |
|
+ |
|
+exit: |
|
+ return migrate; |
|
+} |
|
+ |
|
+int |
|
+tier_migrate(xlator_t *this, int is_promotion, dict_t *migrate_data, loc_t *loc, |
|
+ gf_tier_conf_t *tier_conf) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ pthread_mutex_lock(&tier_conf->pause_mutex); |
|
+ if (is_promotion) |
|
+ tier_conf->promote_in_progress = 1; |
|
+ else |
|
+ tier_conf->demote_in_progress = 1; |
|
+ pthread_mutex_unlock(&tier_conf->pause_mutex); |
|
+ |
|
+ /* Data migration */ |
|
+ ret = syncop_setxattr(this, loc, migrate_data, 0, NULL, NULL); |
|
+ |
|
+ pthread_mutex_lock(&tier_conf->pause_mutex); |
|
+ if (is_promotion) |
|
+ tier_conf->promote_in_progress = 0; |
|
+ else |
|
+ tier_conf->demote_in_progress = 0; |
|
+ pthread_mutex_unlock(&tier_conf->pause_mutex); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/* returns _gf_true: if file can be promoted |
|
+ * returns _gf_false: if file cannot be promoted |
|
+ */ |
|
+static gf_boolean_t |
|
+tier_can_promote_file(xlator_t *this, char const *file_name, |
|
+ struct iatt *current, gf_defrag_info_t *defrag) |
|
+{ |
|
+ gf_boolean_t ret = _gf_false; |
|
+ fsblkcnt_t estimated_usage = 0; |
|
+ |
|
+ if (defrag->tier_conf.tier_max_promote_size && |
|
+ (current->ia_size > defrag->tier_conf.tier_max_promote_size)) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "File %s (gfid:%s) with size (%" PRIu64 |
|
+ ") exceeds maxsize " |
|
+ "(%d) for promotion. File will not be promoted.", |
|
+ file_name, uuid_utoa(current->ia_gfid), current->ia_size, |
|
+ defrag->tier_conf.tier_max_promote_size); |
|
+ goto err; |
|
+ } |
|
+ |
|
+ /* bypass further validations for TEST mode */ |
|
+ if (defrag->tier_conf.mode != TIER_MODE_WM) { |
|
+ ret = _gf_true; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ /* convert the file size to blocks as per the block size of the |
|
+ * destination tier |
|
+ * NOTE: add (block_size - 1) to get the correct block size when |
|
+ * there is a remainder after a modulo |
|
+ */ |
|
+ estimated_usage = ((current->ia_size + defrag->tier_conf.block_size - 1) / |
|
+ defrag->tier_conf.block_size) + |
|
+ defrag->tier_conf.blocks_used; |
|
+ |
|
+ /* test if the estimated block usage goes above HI watermark */ |
|
+ if (GF_PERCENTAGE(estimated_usage, defrag->tier_conf.blocks_total) >= |
|
+ defrag->tier_conf.watermark_hi) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Estimated block count consumption on " |
|
+ "hot tier (%" PRIu64 |
|
+ ") exceeds hi watermark (%d%%). " |
|
+ "File will not be promoted.", |
|
+ estimated_usage, defrag->tier_conf.watermark_hi); |
|
+ goto err; |
|
+ } |
|
+ ret = _gf_true; |
|
+err: |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_set_migrate_data(dict_t *migrate_data) |
|
+{ |
|
+ int failed = 1; |
|
+ |
|
+ failed = dict_set_str(migrate_data, GF_XATTR_FILE_MIGRATE_KEY, "force"); |
|
+ if (failed) { |
|
+ goto bail_out; |
|
+ } |
|
+ |
|
+ /* Flag to suggest the xattr call is from migrator */ |
|
+ failed = dict_set_str(migrate_data, "from.migrator", "yes"); |
|
+ if (failed) { |
|
+ goto bail_out; |
|
+ } |
|
+ |
|
+ /* Flag to suggest its a tiering migration |
|
+ * The reason for this dic key-value is that |
|
+ * promotions and demotions are multithreaded |
|
+ * so the original frame from gf_defrag_start() |
|
+ * is not carried. A new frame will be created when |
|
+ * we do syncop_setxattr(). This does not have the |
|
+ * frame->root->pid of the original frame. So we pass |
|
+ * this dic key-value when we do syncop_setxattr() to do |
|
+ * data migration and set the frame->root->pid to |
|
+ * GF_CLIENT_PID_TIER_DEFRAG in dht_setxattr() just before |
|
+ * calling dht_start_rebalance_task() */ |
|
+ failed = dict_set_str(migrate_data, TIERING_MIGRATION_KEY, "yes"); |
|
+ if (failed) { |
|
+ goto bail_out; |
|
+ } |
|
+ |
|
+ failed = 0; |
|
+ |
|
+bail_out: |
|
+ return failed; |
|
+} |
|
+ |
|
+static char * |
|
+tier_get_parent_path(xlator_t *this, loc_t *p_loc, struct iatt *par_stbuf, |
|
+ int *per_link_status) |
|
+{ |
|
+ int ret = -1; |
|
+ char *parent_path = NULL; |
|
+ dict_t *xdata_request = NULL; |
|
+ dict_t *xdata_response = NULL; |
|
+ |
|
+ xdata_request = dict_new(); |
|
+ if (!xdata_request) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to create xdata_request dict"); |
|
+ goto err; |
|
+ } |
|
+ ret = dict_set_int32(xdata_request, GET_ANCESTRY_PATH_KEY, 42); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to set value to dict : key %s \n", |
|
+ GET_ANCESTRY_PATH_KEY); |
|
+ goto err; |
|
+ } |
|
+ |
|
+ ret = syncop_lookup(this, p_loc, par_stbuf, NULL, xdata_request, |
|
+ &xdata_response); |
|
+ /* When the parent gfid is a stale entry, the lookup |
|
+ * will fail and stop the demotion process. |
|
+ * The parent gfid can be stale when a huge folder is |
|
+ * deleted while the files within it are being migrated |
|
+ */ |
|
+ if (ret == -ESTALE) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, |
|
+ "Stale entry in parent lookup for %s", uuid_utoa(p_loc->gfid)); |
|
+ *per_link_status = 1; |
|
+ goto err; |
|
+ } else if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Error in parent lookup for %s", uuid_utoa(p_loc->gfid)); |
|
+ *per_link_status = -1; |
|
+ goto err; |
|
+ } |
|
+ ret = dict_get_str(xdata_response, GET_ANCESTRY_PATH_KEY, &parent_path); |
|
+ if (ret || !parent_path) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to get parent path for %s", uuid_utoa(p_loc->gfid)); |
|
+ *per_link_status = -1; |
|
+ goto err; |
|
+ } |
|
+ |
|
+err: |
|
+ if (xdata_request) { |
|
+ dict_unref(xdata_request); |
|
+ } |
|
+ |
|
+ if (xdata_response) { |
|
+ dict_unref(xdata_response); |
|
+ xdata_response = NULL; |
|
+ } |
|
+ |
|
+ return parent_path; |
|
+} |
|
+ |
|
+static int |
|
+tier_get_file_name_and_path(xlator_t *this, uuid_t gfid, |
|
+ gfdb_link_info_t *link_info, |
|
+ char const *parent_path, loc_t *loc, |
|
+ int *per_link_status) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ loc->name = gf_strdup(link_info->file_name); |
|
+ if (!loc->name) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Memory " |
|
+ "allocation failed for %s", |
|
+ uuid_utoa(gfid)); |
|
+ *per_link_status = -1; |
|
+ goto err; |
|
+ } |
|
+ ret = gf_asprintf((char **)&(loc->path), "%s/%s", parent_path, loc->name); |
|
+ if (ret < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to " |
|
+ "construct file path for %s %s\n", |
|
+ parent_path, loc->name); |
|
+ *per_link_status = -1; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+err: |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_lookup_file(xlator_t *this, loc_t *p_loc, loc_t *loc, struct iatt *current, |
|
+ int *per_link_status) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ ret = syncop_lookup(this, loc, current, NULL, NULL, NULL); |
|
+ |
|
+ /* The file may be deleted even when the parent |
|
+ * is available and the lookup will |
|
+ * return a stale entry which would stop the |
|
+ * migration. so if its a stale entry, then skip |
|
+ * the file and keep migrating. |
|
+ */ |
|
+ if (ret == -ESTALE) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_STALE_LOOKUP, |
|
+ "Stale lookup for %s", uuid_utoa(p_loc->gfid)); |
|
+ *per_link_status = 1; |
|
+ goto err; |
|
+ } else if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to " |
|
+ "lookup file %s\n", |
|
+ loc->name); |
|
+ *per_link_status = -1; |
|
+ goto err; |
|
+ } |
|
+ ret = 0; |
|
+ |
|
+err: |
|
+ return ret; |
|
+} |
|
+ |
|
+static gf_boolean_t |
|
+tier_is_file_already_at_destination(xlator_t *src_subvol, |
|
+ query_cbk_args_t *query_cbk_args, |
|
+ dht_conf_t *conf, int *per_link_status) |
|
+{ |
|
+ gf_boolean_t at_destination = _gf_true; |
|
+ |
|
+ if (src_subvol == NULL) { |
|
+ *per_link_status = 1; |
|
+ goto err; |
|
+ } |
|
+ if (query_cbk_args->is_promotion && src_subvol == conf->subvolumes[1]) { |
|
+ *per_link_status = 1; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ if (!query_cbk_args->is_promotion && src_subvol == conf->subvolumes[0]) { |
|
+ *per_link_status = 1; |
|
+ goto err; |
|
+ } |
|
+ at_destination = _gf_false; |
|
+ |
|
+err: |
|
+ return at_destination; |
|
+} |
|
+ |
|
+static void |
|
+tier_update_migration_counters(query_cbk_args_t *query_cbk_args, |
|
+ gf_defrag_info_t *defrag, |
|
+ uint64_t *total_migrated_bytes, int *total_files) |
|
+{ |
|
+ if (query_cbk_args->is_promotion) { |
|
+ defrag->total_files_promoted++; |
|
+ *total_migrated_bytes += defrag->tier_conf.st_last_promoted_size; |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ defrag->tier_conf.blocks_used += defrag->tier_conf |
|
+ .st_last_promoted_size; |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ } else { |
|
+ defrag->total_files_demoted++; |
|
+ *total_migrated_bytes += defrag->tier_conf.st_last_demoted_size; |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ defrag->tier_conf.blocks_used -= defrag->tier_conf.st_last_demoted_size; |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ } |
|
+ if (defrag->tier_conf.blocks_total) { |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ defrag->tier_conf.percent_full = GF_PERCENTAGE( |
|
+ defrag->tier_conf.blocks_used, defrag->tier_conf.blocks_total); |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ } |
|
+ |
|
+ (*total_files)++; |
|
+} |
|
+ |
|
+static int |
|
+tier_migrate_link(xlator_t *this, dht_conf_t *conf, uuid_t gfid, |
|
+ gfdb_link_info_t *link_info, gf_defrag_info_t *defrag, |
|
+ query_cbk_args_t *query_cbk_args, dict_t *migrate_data, |
|
+ int *per_link_status, int *total_files, |
|
+ uint64_t *total_migrated_bytes) |
|
+{ |
|
+ int ret = -1; |
|
+ struct iatt current = { |
|
+ 0, |
|
+ }; |
|
+ struct iatt par_stbuf = { |
|
+ 0, |
|
+ }; |
|
+ loc_t p_loc = { |
|
+ 0, |
|
+ }; |
|
+ loc_t loc = { |
|
+ 0, |
|
+ }; |
|
+ xlator_t *src_subvol = NULL; |
|
+ inode_t *linked_inode = NULL; |
|
+ char *parent_path = NULL; |
|
+ |
|
+ /* Lookup for parent and get the path of parent */ |
|
+ gf_uuid_copy(p_loc.gfid, link_info->pargfid); |
|
+ p_loc.inode = inode_new(defrag->root_inode->table); |
|
+ if (!p_loc.inode) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to create reference to inode" |
|
+ " for %s", |
|
+ uuid_utoa(p_loc.gfid)); |
|
+ |
|
+ *per_link_status = -1; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ parent_path = tier_get_parent_path(this, &p_loc, &par_stbuf, |
|
+ per_link_status); |
|
+ if (!parent_path) { |
|
+ goto err; |
|
+ } |
|
+ |
|
+ linked_inode = inode_link(p_loc.inode, NULL, NULL, &par_stbuf); |
|
+ inode_unref(p_loc.inode); |
|
+ p_loc.inode = linked_inode; |
|
+ |
|
+ /* Preparing File Inode */ |
|
+ gf_uuid_copy(loc.gfid, gfid); |
|
+ loc.inode = inode_new(defrag->root_inode->table); |
|
+ gf_uuid_copy(loc.pargfid, link_info->pargfid); |
|
+ loc.parent = inode_ref(p_loc.inode); |
|
+ |
|
+ /* Get filename and Construct file path */ |
|
+ if (tier_get_file_name_and_path(this, gfid, link_info, parent_path, &loc, |
|
+ per_link_status) != 0) { |
|
+ goto err; |
|
+ } |
|
+ gf_uuid_copy(loc.parent->gfid, link_info->pargfid); |
|
+ |
|
+ /* lookup file inode */ |
|
+ if (tier_lookup_file(this, &p_loc, &loc, ¤t, per_link_status) != 0) { |
|
+ goto err; |
|
+ } |
|
+ |
|
+ if (query_cbk_args->is_promotion) { |
|
+ if (!tier_can_promote_file(this, link_info->file_name, ¤t, |
|
+ defrag)) { |
|
+ *per_link_status = 1; |
|
+ goto err; |
|
+ } |
|
+ } |
|
+ |
|
+ linked_inode = inode_link(loc.inode, NULL, NULL, ¤t); |
|
+ inode_unref(loc.inode); |
|
+ loc.inode = linked_inode; |
|
+ |
|
+ /* |
|
+ * Do not promote/demote if file already is where it |
|
+ * should be. It means another brick moved the file |
|
+ * so is not an error. So we set per_link_status = 1 |
|
+ * so that we ignore counting this. |
|
+ */ |
|
+ src_subvol = dht_subvol_get_cached(this, loc.inode); |
|
+ |
|
+ if (tier_is_file_already_at_destination(src_subvol, query_cbk_args, conf, |
|
+ per_link_status)) { |
|
+ goto err; |
|
+ } |
|
+ |
|
+ gf_msg_debug(this->name, 0, "Tier %s: src_subvol %s file %s", |
|
+ (query_cbk_args->is_promotion ? "promote" : "demote"), |
|
+ src_subvol->name, loc.path); |
|
+ |
|
+ ret = tier_check_same_node(this, &loc, defrag); |
|
+ if (ret != 0) { |
|
+ if (ret < 0) { |
|
+ *per_link_status = -1; |
|
+ goto err; |
|
+ } |
|
+ ret = 0; |
|
+ /* By setting per_link_status to 1 we are |
|
+ * ignoring this status and will not be counting |
|
+ * this file for migration */ |
|
+ *per_link_status = 1; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ gf_uuid_copy(loc.gfid, loc.inode->gfid); |
|
+ |
|
+ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Tiering paused. " |
|
+ "Exiting tier_migrate_link"); |
|
+ goto err; |
|
+ } |
|
+ |
|
+ ret = tier_migrate(this, query_cbk_args->is_promotion, migrate_data, &loc, |
|
+ &defrag->tier_conf); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to " |
|
+ "migrate %s ", |
|
+ loc.path); |
|
+ *per_link_status = -1; |
|
+ goto err; |
|
+ } |
|
+ |
|
+ tier_update_migration_counters(query_cbk_args, defrag, total_migrated_bytes, |
|
+ total_files); |
|
+ |
|
+ ret = 0; |
|
+ |
|
+err: |
|
+ GF_FREE((char *)loc.name); |
|
+ loc.name = NULL; |
|
+ loc_wipe(&loc); |
|
+ loc_wipe(&p_loc); |
|
+ |
|
+ if ((*total_files >= defrag->tier_conf.max_migrate_files) || |
|
+ (*total_migrated_bytes > defrag->tier_conf.max_migrate_bytes)) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Reached cycle migration limit." |
|
+ "migrated bytes %" PRId64 " files %d", |
|
+ *total_migrated_bytes, *total_files); |
|
+ ret = -1; |
|
+ } |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_migrate_using_query_file(void *_args) |
|
+{ |
|
+ int ret = -1; |
|
+ query_cbk_args_t *query_cbk_args = (query_cbk_args_t *)_args; |
|
+ xlator_t *this = NULL; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ gfdb_query_record_t *query_record = NULL; |
|
+ gfdb_link_info_t *link_info = NULL; |
|
+ dict_t *migrate_data = NULL; |
|
+ /* |
|
+ * per_file_status and per_link_status |
|
+ * 0 : success |
|
+ * -1 : failure |
|
+ * 1 : ignore the status and don't count for migration |
|
+ * */ |
|
+ int per_file_status = 0; |
|
+ int per_link_status = 0; |
|
+ int total_status = 0; |
|
+ dht_conf_t *conf = NULL; |
|
+ uint64_t total_migrated_bytes = 0; |
|
+ int total_files = 0; |
|
+ loc_t root_loc = {0}; |
|
+ gfdb_time_t start_time = {0}; |
|
+ gfdb_time_t current_time = {0}; |
|
+ int total_time = 0; |
|
+ int max_time = 0; |
|
+ gf_boolean_t emergency_demote_mode = _gf_false; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); |
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); |
|
+ this = query_cbk_args->this; |
|
+ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->defrag, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, query_cbk_args->qfile_array, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ defrag = query_cbk_args->defrag; |
|
+ migrate_data = dict_new(); |
|
+ if (!migrate_data) |
|
+ goto out; |
|
+ |
|
+ emergency_demote_mode = (!query_cbk_args->is_promotion && |
|
+ is_hot_tier_full(&defrag->tier_conf)); |
|
+ |
|
+ if (tier_set_migrate_data(migrate_data) != 0) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ dht_build_root_loc(defrag->root_inode, &root_loc); |
|
+ |
|
+ ret = gettimeofday(&start_time, NULL); |
|
+ if (query_cbk_args->is_promotion) { |
|
+ max_time = defrag->tier_conf.tier_promote_frequency; |
|
+ } else { |
|
+ max_time = defrag->tier_conf.tier_demote_frequency; |
|
+ } |
|
+ |
|
+ /* Per file */ |
|
+ while ((ret = read_query_record_list(query_cbk_args->qfile_array, |
|
+ &query_record)) != 0) { |
|
+ if (ret < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to fetch query record " |
|
+ "from query file"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { |
|
+ ret = -1; |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Exiting tier migration as" |
|
+ "defrag status is not started"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = gettimeofday(¤t_time, NULL); |
|
+ if (ret < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Could not get current time."); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ total_time = current_time.tv_sec - start_time.tv_sec; |
|
+ if (total_time > max_time) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Max cycle time reached. Exiting migration."); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ per_file_status = 0; |
|
+ per_link_status = 0; |
|
+ |
|
+ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Tiering paused. " |
|
+ "Exiting tier_migrate_using_query_file"); |
|
+ break; |
|
+ } |
|
+ |
|
+ if (defrag->tier_conf.mode == TIER_MODE_WM) { |
|
+ ret = tier_get_fs_stat(this, &root_loc); |
|
+ if (ret != 0) { |
|
+ gfdb_methods.gfdb_query_record_free(query_record); |
|
+ query_record = NULL; |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "tier_get_fs_stat() FAILED ... " |
|
+ "skipping file migrations until next cycle"); |
|
+ break; |
|
+ } |
|
+ |
|
+ if (!tier_do_migration(this, query_cbk_args->is_promotion)) { |
|
+ gfdb_methods.gfdb_query_record_free(query_record); |
|
+ query_record = NULL; |
|
+ |
|
+ /* We have crossed the high watermark. Stop processing |
|
+ * files if this is a promotion cycle so demotion gets |
|
+ * a chance to start if not already running*/ |
|
+ |
|
+ if (query_cbk_args->is_promotion && |
|
+ is_hot_tier_full(&defrag->tier_conf)) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "High watermark crossed during " |
|
+ "promotion. Exiting " |
|
+ "tier_migrate_using_query_file"); |
|
+ break; |
|
+ } |
|
+ continue; |
|
+ } |
|
+ } |
|
+ |
|
+ per_link_status = 0; |
|
+ |
|
+ /* For now we only support single link migration. And we will |
|
+ * ignore other hard links in the link info list of query record |
|
+ * TODO: Multiple hard links migration */ |
|
+ if (!list_empty(&query_record->link_list)) { |
|
+ link_info = list_first_entry(&query_record->link_list, |
|
+ gfdb_link_info_t, list); |
|
+ } |
|
+ if (link_info != NULL) { |
|
+ if (tier_migrate_link(this, conf, query_record->gfid, link_info, |
|
+ defrag, query_cbk_args, migrate_data, |
|
+ &per_link_status, &total_files, |
|
+ &total_migrated_bytes) != 0) { |
|
+ gf_msg( |
|
+ this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "%s failed for %s(gfid:%s)", |
|
+ (query_cbk_args->is_promotion ? "Promotion" : "Demotion"), |
|
+ link_info->file_name, uuid_utoa(query_record->gfid)); |
|
+ } |
|
+ } |
|
+ per_file_status = per_link_status; |
|
+ |
|
+ if (per_file_status < 0) { /* Failure */ |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ defrag->total_failures++; |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ } else if (per_file_status == 0) { /* Success */ |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ defrag->total_files++; |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ } else if (per_file_status == 1) { /* Ignore */ |
|
+ per_file_status = 0; |
|
+ /* Since this attempt was ignored we |
|
+ * decrement the lookup count*/ |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ defrag->num_files_lookedup--; |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ } |
|
+ total_status = total_status + per_file_status; |
|
+ per_link_status = 0; |
|
+ per_file_status = 0; |
|
+ |
|
+ gfdb_methods.gfdb_query_record_free(query_record); |
|
+ query_record = NULL; |
|
+ |
|
+ /* If we are demoting and the entry watermark was HI, then |
|
+ * we are done with emergency demotions if the current |
|
+ * watermark has fallen below hi-watermark level |
|
+ */ |
|
+ if (emergency_demote_mode) { |
|
+ if (tier_check_watermark(this) == 0) { |
|
+ if (!is_hot_tier_full(&defrag->tier_conf)) { |
|
+ break; |
|
+ } |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+out: |
|
+ if (migrate_data) |
|
+ dict_unref(migrate_data); |
|
+ |
|
+ gfdb_methods.gfdb_query_record_free(query_record); |
|
+ query_record = NULL; |
|
+ |
|
+ return total_status; |
|
+} |
|
+ |
|
+/* This is the call back function per record/file from data base */ |
|
+static int |
|
+tier_gf_query_callback(gfdb_query_record_t *gfdb_query_record, void *_args) |
|
+{ |
|
+ int ret = -1; |
|
+ query_cbk_args_t *query_cbk_args = _args; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args, out); |
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->defrag, out); |
|
+ GF_VALIDATE_OR_GOTO("tier", (query_cbk_args->query_fd > 0), out); |
|
+ |
|
+ ret = gfdb_methods.gfdb_write_query_record(query_cbk_args->query_fd, |
|
+ gfdb_query_record); |
|
+ if (ret) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed writing query record to query file"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ query_cbk_args->defrag->num_files_lookedup++; |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+/* Create query file in tier process */ |
|
+static int |
|
+tier_process_self_query(tier_brick_list_t *local_brick, void *args) |
|
+{ |
|
+ int ret = -1; |
|
+ char *db_path = NULL; |
|
+ query_cbk_args_t *query_cbk_args = NULL; |
|
+ xlator_t *this = NULL; |
|
+ gfdb_conn_node_t *conn_node = NULL; |
|
+ dict_t *params_dict = NULL; |
|
+ dict_t *ctr_ipc_dict = NULL; |
|
+ gfdb_brick_info_t *gfdb_brick_info = args; |
|
+ |
|
+ /*Init of all the essentials*/ |
|
+ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); |
|
+ query_cbk_args = gfdb_brick_info->_query_cbk_args; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); |
|
+ this = query_cbk_args->this; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); |
|
+ |
|
+ db_path = local_brick->brick_db_path; |
|
+ |
|
+ /*Preparing DB parameters before init_db i.e getting db connection*/ |
|
+ params_dict = dict_new(); |
|
+ if (!params_dict) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "DB Params cannot initialized"); |
|
+ goto out; |
|
+ } |
|
+ SET_DB_PARAM_TO_DICT(this->name, params_dict, |
|
+ (char *)gfdb_methods.get_db_path_key(), db_path, ret, |
|
+ out); |
|
+ |
|
+ /*Get the db connection*/ |
|
+ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); |
|
+ if (!conn_node) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "FATAL: Failed initializing db operations"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* Query for eligible files from db */ |
|
+ query_cbk_args->query_fd = open(local_brick->qfile_path, |
|
+ O_WRONLY | O_CREAT | O_APPEND, |
|
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); |
|
+ if (query_cbk_args->query_fd < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to open query file %s", local_brick->qfile_path); |
|
+ goto out; |
|
+ } |
|
+ if (!gfdb_brick_info->_gfdb_promote) { |
|
+ if (query_cbk_args->defrag->tier_conf.watermark_last == TIER_WM_HI) { |
|
+ /* emergency demotion mode */ |
|
+ ret = gfdb_methods.find_all( |
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args, |
|
+ query_cbk_args->defrag->tier_conf.query_limit); |
|
+ } else { |
|
+ if (query_cbk_args->defrag->write_freq_threshold == 0 && |
|
+ query_cbk_args->defrag->read_freq_threshold == 0) { |
|
+ ret = gfdb_methods.find_unchanged_for_time( |
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args, |
|
+ gfdb_brick_info->time_stamp); |
|
+ } else { |
|
+ ret = gfdb_methods.find_unchanged_for_time_freq( |
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args, |
|
+ gfdb_brick_info->time_stamp, |
|
+ query_cbk_args->defrag->write_freq_threshold, |
|
+ query_cbk_args->defrag->read_freq_threshold, _gf_false); |
|
+ } |
|
+ } |
|
+ } else { |
|
+ if (query_cbk_args->defrag->write_freq_threshold == 0 && |
|
+ query_cbk_args->defrag->read_freq_threshold == 0) { |
|
+ ret = gfdb_methods.find_recently_changed_files( |
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args, |
|
+ gfdb_brick_info->time_stamp); |
|
+ } else { |
|
+ ret = gfdb_methods.find_recently_changed_files_freq( |
|
+ conn_node, tier_gf_query_callback, (void *)query_cbk_args, |
|
+ gfdb_brick_info->time_stamp, |
|
+ query_cbk_args->defrag->write_freq_threshold, |
|
+ query_cbk_args->defrag->read_freq_threshold, _gf_false); |
|
+ } |
|
+ } |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "FATAL: query from db failed"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*Clear the heat on the DB entries*/ |
|
+ /*Preparing ctr_ipc_dict*/ |
|
+ ctr_ipc_dict = dict_new(); |
|
+ if (!ctr_ipc_dict) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "ctr_ipc_dict cannot initialized"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, |
|
+ GFDB_IPC_CTR_CLEAR_OPS, ret, out); |
|
+ |
|
+ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, |
|
+ NULL); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed clearing the heat " |
|
+ "on db %s error %d", |
|
+ local_brick->brick_db_path, ret); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ if (params_dict) { |
|
+ dict_unref(params_dict); |
|
+ params_dict = NULL; |
|
+ } |
|
+ |
|
+ if (ctr_ipc_dict) { |
|
+ dict_unref(ctr_ipc_dict); |
|
+ ctr_ipc_dict = NULL; |
|
+ } |
|
+ |
|
+ if (query_cbk_args && query_cbk_args->query_fd >= 0) { |
|
+ sys_close(query_cbk_args->query_fd); |
|
+ query_cbk_args->query_fd = -1; |
|
+ } |
|
+ gfdb_methods.fini_db(conn_node); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/*Ask CTR to create the query file*/ |
|
+static int |
|
+tier_process_ctr_query(tier_brick_list_t *local_brick, void *args) |
|
+{ |
|
+ int ret = -1; |
|
+ query_cbk_args_t *query_cbk_args = NULL; |
|
+ xlator_t *this = NULL; |
|
+ dict_t *ctr_ipc_in_dict = NULL; |
|
+ dict_t *ctr_ipc_out_dict = NULL; |
|
+ gfdb_brick_info_t *gfdb_brick_info = args; |
|
+ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; |
|
+ int count = 0; |
|
+ |
|
+ /*Init of all the essentials*/ |
|
+ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); |
|
+ query_cbk_args = gfdb_brick_info->_query_cbk_args; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); |
|
+ this = query_cbk_args->this; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); |
|
+ |
|
+ /*Preparing ctr_ipc_in_dict*/ |
|
+ ctr_ipc_in_dict = dict_new(); |
|
+ if (!ctr_ipc_in_dict) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "ctr_ipc_in_dict cannot initialized"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ipc_ctr_params = GF_CALLOC(1, sizeof(gfdb_ipc_ctr_params_t), |
|
+ gf_tier_mt_ipc_ctr_params_t); |
|
+ if (!ipc_ctr_params) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* set all the query params*/ |
|
+ ipc_ctr_params->is_promote = gfdb_brick_info->_gfdb_promote; |
|
+ |
|
+ ipc_ctr_params->write_freq_threshold = query_cbk_args->defrag |
|
+ ->write_freq_threshold; |
|
+ |
|
+ ipc_ctr_params->read_freq_threshold = query_cbk_args->defrag |
|
+ ->read_freq_threshold; |
|
+ |
|
+ ipc_ctr_params->query_limit = query_cbk_args->defrag->tier_conf.query_limit; |
|
+ |
|
+ ipc_ctr_params->emergency_demote = (!gfdb_brick_info->_gfdb_promote && |
|
+ query_cbk_args->defrag->tier_conf |
|
+ .watermark_last == TIER_WM_HI); |
|
+ |
|
+ memcpy(&ipc_ctr_params->time_stamp, gfdb_brick_info->time_stamp, |
|
+ sizeof(gfdb_time_t)); |
|
+ |
|
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, |
|
+ GFDB_IPC_CTR_QUERY_OPS, ret, out); |
|
+ |
|
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_in_dict, |
|
+ GFDB_IPC_CTR_GET_QFILE_PATH, local_brick->qfile_path, |
|
+ ret, out); |
|
+ |
|
+ ret = dict_set_bin(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, |
|
+ ipc_ctr_params, sizeof(*ipc_ctr_params)); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, |
|
+ "Failed setting %s to params dictionary", |
|
+ GFDB_IPC_CTR_GET_QUERY_PARAMS); |
|
+ GF_FREE(ipc_ctr_params); |
|
+ goto out; |
|
+ } |
|
+ ipc_ctr_params = NULL; |
|
+ |
|
+ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_in_dict, |
|
+ &ctr_ipc_out_dict); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_IPC_TIER_ERROR, |
|
+ "Failed query on %s ret %d", local_brick->brick_db_path, ret); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_get_int32(ctr_ipc_out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, |
|
+ &count); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed getting count " |
|
+ "of records on %s", |
|
+ local_brick->brick_db_path); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (count < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed query on %s", local_brick->brick_db_path); |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ pthread_mutex_lock(&dm_stat_mutex); |
|
+ query_cbk_args->defrag->num_files_lookedup = count; |
|
+ pthread_mutex_unlock(&dm_stat_mutex); |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ |
|
+ if (ctr_ipc_in_dict) { |
|
+ dict_unref(ctr_ipc_in_dict); |
|
+ ctr_ipc_in_dict = NULL; |
|
+ } |
|
+ |
|
+ if (ctr_ipc_out_dict) { |
|
+ dict_unref(ctr_ipc_out_dict); |
|
+ ctr_ipc_out_dict = NULL; |
|
+ } |
|
+ |
|
+ GF_FREE(ipc_ctr_params); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/* This is the call back function for each brick from hot/cold bricklist |
|
+ * It picks up each bricks db and queries for eligible files for migration. |
|
+ * The list of eligible files are populated in appropriate query files*/ |
|
+static int |
|
+tier_process_brick(tier_brick_list_t *local_brick, void *args) |
|
+{ |
|
+ int ret = -1; |
|
+ dict_t *ctr_ipc_in_dict = NULL; |
|
+ dict_t *ctr_ipc_out_dict = NULL; |
|
+ char *strval = NULL; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", local_brick, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); |
|
+ |
|
+ if (dht_tier_db_type == GFDB_SQLITE3) { |
|
+ /*Preparing ctr_ipc_in_dict*/ |
|
+ ctr_ipc_in_dict = dict_new(); |
|
+ if (!ctr_ipc_in_dict) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "ctr_ipc_in_dict cannot initialized"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_KEY, |
|
+ GFDB_IPC_CTR_GET_DB_PARAM_OPS); |
|
+ if (ret) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, |
|
+ "Failed to set %s " |
|
+ "to params dictionary", |
|
+ GFDB_IPC_CTR_KEY); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_PARAM_OPS, ""); |
|
+ if (ret) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, |
|
+ "Failed to set %s " |
|
+ "to params dictionary", |
|
+ GFDB_IPC_CTR_GET_DB_PARAM_OPS); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_set_str(ctr_ipc_in_dict, GFDB_IPC_CTR_GET_DB_KEY, |
|
+ "journal_mode"); |
|
+ if (ret) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, |
|
+ "Failed to set %s " |
|
+ "to params dictionary", |
|
+ GFDB_IPC_CTR_GET_DB_KEY); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, |
|
+ ctr_ipc_in_dict, &ctr_ipc_out_dict); |
|
+ if (ret || ctr_ipc_out_dict == NULL) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to get " |
|
+ "journal_mode of sql db %s", |
|
+ local_brick->brick_db_path); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_get_str(ctr_ipc_out_dict, "journal_mode", &strval); |
|
+ if (ret) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_GET_PARAM_FAILED, |
|
+ "Failed to get %s " |
|
+ "from params dictionary" |
|
+ "journal_mode", |
|
+ strval); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (strval && (strncmp(strval, "wal", SLEN("wal")) == 0)) { |
|
+ ret = tier_process_self_query(local_brick, args); |
|
+ if (ret) { |
|
+ goto out; |
|
+ } |
|
+ } else { |
|
+ ret = tier_process_ctr_query(local_brick, args); |
|
+ if (ret) { |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ ret = 0; |
|
+ |
|
+ } else { |
|
+ ret = tier_process_self_query(local_brick, args); |
|
+ if (ret) { |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ if (ctr_ipc_in_dict) |
|
+ dict_unref(ctr_ipc_in_dict); |
|
+ |
|
+ if (ctr_ipc_out_dict) |
|
+ dict_unref(ctr_ipc_out_dict); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_build_migration_qfile(migration_args_t *args, |
|
+ query_cbk_args_t *query_cbk_args, |
|
+ gf_boolean_t is_promotion) |
|
+{ |
|
+ gfdb_time_t current_time; |
|
+ gfdb_brick_info_t gfdb_brick_info; |
|
+ gfdb_time_t time_in_past; |
|
+ int ret = -1; |
|
+ tier_brick_list_t *local_brick = NULL; |
|
+ int i = 0; |
|
+ time_in_past.tv_sec = args->freq_time; |
|
+ time_in_past.tv_usec = 0; |
|
+ |
|
+ ret = gettimeofday(¤t_time, NULL); |
|
+ if (ret == -1) { |
|
+ gf_msg(args->this->name, GF_LOG_ERROR, errno, |
|
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); |
|
+ goto out; |
|
+ } |
|
+ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; |
|
+ |
|
+ /* The migration daemon may run a varying numberof usec after the */ |
|
+ /* sleep call triggers. A file may be registered in CTR some number */ |
|
+ /* of usec X after the daemon started and missed in the subsequent */ |
|
+ /* cycle if the daemon starts Y usec after the period in seconds */ |
|
+ /* where Y>X. Normalize away this problem by always setting usec */ |
|
+ /* to 0. */ |
|
+ time_in_past.tv_usec = 0; |
|
+ |
|
+ gfdb_brick_info.time_stamp = &time_in_past; |
|
+ gfdb_brick_info._gfdb_promote = is_promotion; |
|
+ gfdb_brick_info._query_cbk_args = query_cbk_args; |
|
+ |
|
+ list_for_each_entry(local_brick, args->brick_list, list) |
|
+ { |
|
+ /* Construct query file path for this brick |
|
+ * i.e |
|
+ * /var/run/gluster/xlator_name/ |
|
+ * {promote/demote}-brickname-indexinbricklist |
|
+ * So that no two query files will have same path even |
|
+ * bricks have the same name |
|
+ * */ |
|
+ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", |
|
+ GET_QFILE_PATH(gfdb_brick_info._gfdb_promote), |
|
+ local_brick->brick_name, i); |
|
+ |
|
+ /* Delete any old query files for this brick */ |
|
+ sys_unlink(local_brick->qfile_path); |
|
+ |
|
+ ret = tier_process_brick(local_brick, &gfdb_brick_info); |
|
+ if (ret) { |
|
+ gf_msg(args->this->name, GF_LOG_ERROR, 0, |
|
+ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s query failed\n", |
|
+ local_brick->brick_db_path); |
|
+ } |
|
+ i++; |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_migrate_files_using_qfile(migration_args_t *comp, |
|
+ query_cbk_args_t *query_cbk_args) |
|
+{ |
|
+ int ret = -1; |
|
+ tier_brick_list_t *local_brick = NULL; |
|
+ tier_brick_list_t *temp = NULL; |
|
+ gfdb_time_t current_time = { |
|
+ 0, |
|
+ }; |
|
+ ssize_t qfile_array_size = 0; |
|
+ int count = 0; |
|
+ int temp_fd = 0; |
|
+ gf_tier_conf_t *tier_conf = NULL; |
|
+ |
|
+ tier_conf = &(query_cbk_args->defrag->tier_conf); |
|
+ |
|
+ /* Time for error query files */ |
|
+ gettimeofday(¤t_time, NULL); |
|
+ |
|
+ /* Build the qfile list */ |
|
+ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) |
|
+ { |
|
+ qfile_array_size++; |
|
+ } |
|
+ query_cbk_args->qfile_array = qfile_array_new(qfile_array_size); |
|
+ if (!query_cbk_args->qfile_array) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to create new " |
|
+ "qfile_array"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*Open all qfiles*/ |
|
+ count = 0; |
|
+ query_cbk_args->qfile_array->exhausted_count = 0; |
|
+ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) |
|
+ { |
|
+ temp_fd = query_cbk_args->qfile_array->fd_array[count]; |
|
+ temp_fd = open(local_brick->qfile_path, O_RDONLY, |
|
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); |
|
+ if (temp_fd < 0) { |
|
+ gf_msg("tier", GF_LOG_ERROR, errno, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to open " |
|
+ "%s to the query file", |
|
+ local_brick->qfile_path); |
|
+ query_cbk_args->qfile_array->exhausted_count++; |
|
+ } |
|
+ query_cbk_args->qfile_array->fd_array[count] = temp_fd; |
|
+ count++; |
|
+ } |
|
+ |
|
+ /* Moving the query file index to the next, so that we won't the same |
|
+ * query file every cycle as the first one */ |
|
+ query_cbk_args->qfile_array |
|
+ ->next_index = (query_cbk_args->is_promotion) |
|
+ ? tier_conf->last_promote_qfile_index |
|
+ : tier_conf->last_demote_qfile_index; |
|
+ shift_next_index(query_cbk_args->qfile_array); |
|
+ if (query_cbk_args->is_promotion) { |
|
+ tier_conf->last_promote_qfile_index = query_cbk_args->qfile_array |
|
+ ->next_index; |
|
+ } else { |
|
+ tier_conf->last_demote_qfile_index = query_cbk_args->qfile_array |
|
+ ->next_index; |
|
+ } |
|
+ |
|
+ /* Migrate files using query file list */ |
|
+ ret = tier_migrate_using_query_file((void *)query_cbk_args); |
|
+out: |
|
+ qfile_array_free(query_cbk_args->qfile_array); |
|
+ |
|
+ /* If there is an error rename all the query files to .err files |
|
+ * with a timestamp for better debugging */ |
|
+ if (ret) { |
|
+ struct tm tm = { |
|
+ 0, |
|
+ }; |
|
+ char time_str[128] = { |
|
+ 0, |
|
+ }; |
|
+ char query_file_path_err[PATH_MAX] = { |
|
+ 0, |
|
+ }; |
|
+ int32_t len = 0; |
|
+ |
|
+ /* Time format for error query files */ |
|
+ gmtime_r(¤t_time.tv_sec, &tm); |
|
+ strftime(time_str, sizeof(time_str), "%F-%T", &tm); |
|
+ |
|
+ list_for_each_entry_safe(local_brick, temp, comp->brick_list, list) |
|
+ { |
|
+ /* rename error qfile*/ |
|
+ len = snprintf(query_file_path_err, sizeof(query_file_path_err), |
|
+ "%s-%s.err", local_brick->qfile_path, time_str); |
|
+ if ((len >= 0) && (len < sizeof(query_file_path_err))) { |
|
+ if (sys_rename(local_brick->qfile_path, query_file_path_err) == |
|
+ -1) |
|
+ gf_msg_debug("tier", 0, |
|
+ "rename " |
|
+ "failed"); |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+ query_cbk_args->qfile_array = NULL; |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+tier_demote(migration_args_t *demotion_args) |
|
+{ |
|
+ query_cbk_args_t query_cbk_args; |
|
+ int ret = -1; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", demotion_args, out); |
|
+ GF_VALIDATE_OR_GOTO("tier", demotion_args->this, out); |
|
+ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->brick_list, |
|
+ out); |
|
+ GF_VALIDATE_OR_GOTO(demotion_args->this->name, demotion_args->defrag, out); |
|
+ |
|
+ THIS = demotion_args->this; |
|
+ |
|
+ query_cbk_args.this = demotion_args->this; |
|
+ query_cbk_args.defrag = demotion_args->defrag; |
|
+ query_cbk_args.is_promotion = 0; |
|
+ |
|
+ /*Build the query file using bricklist*/ |
|
+ ret = tier_build_migration_qfile(demotion_args, &query_cbk_args, _gf_false); |
|
+ if (ret) |
|
+ goto out; |
|
+ |
|
+ /* Migrate files using the query file */ |
|
+ ret = tier_migrate_files_using_qfile(demotion_args, &query_cbk_args); |
|
+ if (ret) |
|
+ goto out; |
|
+ |
|
+out: |
|
+ demotion_args->return_value = ret; |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+tier_promote(migration_args_t *promotion_args) |
|
+{ |
|
+ int ret = -1; |
|
+ query_cbk_args_t query_cbk_args; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", promotion_args->this, out); |
|
+ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->brick_list, |
|
+ out); |
|
+ GF_VALIDATE_OR_GOTO(promotion_args->this->name, promotion_args->defrag, |
|
+ out); |
|
+ |
|
+ THIS = promotion_args->this; |
|
+ |
|
+ query_cbk_args.this = promotion_args->this; |
|
+ query_cbk_args.defrag = promotion_args->defrag; |
|
+ query_cbk_args.is_promotion = 1; |
|
+ |
|
+ /*Build the query file using bricklist*/ |
|
+ ret = tier_build_migration_qfile(promotion_args, &query_cbk_args, _gf_true); |
|
+ if (ret) |
|
+ goto out; |
|
+ |
|
+ /* Migrate files using the query file */ |
|
+ ret = tier_migrate_files_using_qfile(promotion_args, &query_cbk_args); |
|
+ if (ret) |
|
+ goto out; |
|
+ |
|
+out: |
|
+ promotion_args->return_value = ret; |
|
+ return ret; |
|
+} |
|
+ |
|
+/* |
|
+ * Command the CTR on a brick to compact the local database using an IPC |
|
+ */ |
|
+static int |
|
+tier_process_self_compact(tier_brick_list_t *local_brick, void *args) |
|
+{ |
|
+ int ret = -1; |
|
+ char *db_path = NULL; |
|
+ query_cbk_args_t *query_cbk_args = NULL; |
|
+ xlator_t *this = NULL; |
|
+ gfdb_conn_node_t *conn_node = NULL; |
|
+ dict_t *params_dict = NULL; |
|
+ dict_t *ctr_ipc_dict = NULL; |
|
+ gfdb_brick_info_t *gfdb_brick_info = args; |
|
+ |
|
+ /*Init of all the essentials*/ |
|
+ GF_VALIDATE_OR_GOTO("tier", gfdb_brick_info, out); |
|
+ query_cbk_args = gfdb_brick_info->_query_cbk_args; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", query_cbk_args->this, out); |
|
+ this = query_cbk_args->this; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, gfdb_brick_info->_query_cbk_args, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->xlator, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, local_brick->brick_db_path, out); |
|
+ |
|
+ db_path = local_brick->brick_db_path; |
|
+ |
|
+ /*Preparing DB parameters before init_db i.e getting db connection*/ |
|
+ params_dict = dict_new(); |
|
+ if (!params_dict) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "DB Params cannot initialized"); |
|
+ goto out; |
|
+ } |
|
+ SET_DB_PARAM_TO_DICT(this->name, params_dict, |
|
+ (char *)gfdb_methods.get_db_path_key(), db_path, ret, |
|
+ out); |
|
+ |
|
+ /*Get the db connection*/ |
|
+ conn_node = gfdb_methods.init_db((void *)params_dict, dht_tier_db_type); |
|
+ if (!conn_node) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "FATAL: Failed initializing db operations"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+ /*Preparing ctr_ipc_dict*/ |
|
+ ctr_ipc_dict = dict_new(); |
|
+ if (!ctr_ipc_dict) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "ctr_ipc_dict cannot initialized"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_set_int32(ctr_ipc_dict, "compact_active", |
|
+ query_cbk_args->defrag->tier_conf.compact_active); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, |
|
+ "Failed to set %s " |
|
+ "to params dictionary", |
|
+ "compact_active"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_set_int32( |
|
+ ctr_ipc_dict, "compact_mode_switched", |
|
+ query_cbk_args->defrag->tier_conf.compact_mode_switched); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, LG_MSG_SET_PARAM_FAILED, |
|
+ "Failed to set %s " |
|
+ "to params dictionary", |
|
+ "compact_mode_switched"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ SET_DB_PARAM_TO_DICT(this->name, ctr_ipc_dict, GFDB_IPC_CTR_KEY, |
|
+ GFDB_IPC_CTR_SET_COMPACT_PRAGMA, ret, out); |
|
+ |
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Starting Compaction IPC"); |
|
+ |
|
+ ret = syncop_ipc(local_brick->xlator, GF_IPC_TARGET_CTR, ctr_ipc_dict, |
|
+ NULL); |
|
+ |
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Ending Compaction IPC"); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed compaction " |
|
+ "on db %s error %d", |
|
+ local_brick->brick_db_path, ret); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "SUCCESS: %s Compaction", local_brick->brick_name); |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ if (params_dict) { |
|
+ dict_unref(params_dict); |
|
+ params_dict = NULL; |
|
+ } |
|
+ |
|
+ if (ctr_ipc_dict) { |
|
+ dict_unref(ctr_ipc_dict); |
|
+ ctr_ipc_dict = NULL; |
|
+ } |
|
+ |
|
+ gfdb_methods.fini_db(conn_node); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/* |
|
+ * This is the call back function for each brick from hot/cold bricklist. |
|
+ * It determines the database type on each brick and calls the corresponding |
|
+ * function to prepare the compaction IPC. |
|
+ */ |
|
+static int |
|
+tier_compact_db_brick(tier_brick_list_t *local_brick, void *args) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", local_brick, out); |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", local_brick->xlator, out); |
|
+ |
|
+ ret = tier_process_self_compact(local_brick, args); |
|
+ if (ret) { |
|
+ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Brick %s did not compact", local_brick->brick_name); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_send_compact(migration_args_t *args, query_cbk_args_t *query_cbk_args) |
|
+{ |
|
+ gfdb_time_t current_time; |
|
+ gfdb_brick_info_t gfdb_brick_info; |
|
+ gfdb_time_t time_in_past; |
|
+ int ret = -1; |
|
+ tier_brick_list_t *local_brick = NULL; |
|
+ |
|
+ time_in_past.tv_sec = args->freq_time; |
|
+ time_in_past.tv_usec = 0; |
|
+ |
|
+ ret = gettimeofday(¤t_time, NULL); |
|
+ if (ret == -1) { |
|
+ gf_msg(args->this->name, GF_LOG_ERROR, errno, |
|
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED, "Failed to get current time"); |
|
+ goto out; |
|
+ } |
|
+ time_in_past.tv_sec = current_time.tv_sec - time_in_past.tv_sec; |
|
+ |
|
+ /* The migration daemon may run a varying numberof usec after the sleep |
|
+ call triggers. A file may be registered in CTR some number of usec X |
|
+ after the daemon started and missed in the subsequent cycle if the |
|
+ daemon starts Y usec after the period in seconds where Y>X. Normalize |
|
+ away this problem by always setting usec to 0. */ |
|
+ time_in_past.tv_usec = 0; |
|
+ |
|
+ gfdb_brick_info.time_stamp = &time_in_past; |
|
+ |
|
+ /* This is meant to say we are always compacting at this point */ |
|
+ /* We simply borrow the promotion flag to do this */ |
|
+ gfdb_brick_info._gfdb_promote = 1; |
|
+ |
|
+ gfdb_brick_info._query_cbk_args = query_cbk_args; |
|
+ |
|
+ list_for_each_entry(local_brick, args->brick_list, list) |
|
+ { |
|
+ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Start compaction for %s", local_brick->brick_name); |
|
+ |
|
+ ret = tier_compact_db_brick(local_brick, &gfdb_brick_info); |
|
+ if (ret) { |
|
+ gf_msg(args->this->name, GF_LOG_ERROR, 0, |
|
+ DHT_MSG_BRICK_QUERY_FAILED, "Brick %s compaction failed\n", |
|
+ local_brick->brick_db_path); |
|
+ } |
|
+ |
|
+ gf_msg(args->this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "End compaction for %s", local_brick->brick_name); |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_compact(void *args) |
|
+{ |
|
+ int ret = -1; |
|
+ query_cbk_args_t query_cbk_args; |
|
+ migration_args_t *compaction_args = args; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", compaction_args->this, out); |
|
+ GF_VALIDATE_OR_GOTO(compaction_args->this->name, |
|
+ compaction_args->brick_list, out); |
|
+ GF_VALIDATE_OR_GOTO(compaction_args->this->name, compaction_args->defrag, |
|
+ out); |
|
+ |
|
+ THIS = compaction_args->this; |
|
+ |
|
+ query_cbk_args.this = compaction_args->this; |
|
+ query_cbk_args.defrag = compaction_args->defrag; |
|
+ query_cbk_args.is_compaction = 1; |
|
+ |
|
+ /* Send the compaction pragma out to all the bricks on the bricklist. */ |
|
+ /* tier_get_bricklist ensures all bricks on the list are local to */ |
|
+ /* this node. */ |
|
+ ret = tier_send_compact(compaction_args, &query_cbk_args); |
|
+ if (ret) |
|
+ goto out; |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ compaction_args->return_value = ret; |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_get_bricklist(xlator_t *xl, struct list_head *local_bricklist_head) |
|
+{ |
|
+ xlator_list_t *child = NULL; |
|
+ char *rv = NULL; |
|
+ char *rh = NULL; |
|
+ char *brickname = NULL; |
|
+ char db_name[PATH_MAX] = ""; |
|
+ int ret = 0; |
|
+ tier_brick_list_t *local_brick = NULL; |
|
+ int32_t len = 0; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", xl, out); |
|
+ GF_VALIDATE_OR_GOTO("tier", local_bricklist_head, out); |
|
+ |
|
+ /* |
|
+ * This function obtains remote subvolumes and filters out only |
|
+ * those running on the same node as the tier daemon. |
|
+ */ |
|
+ if (strcmp(xl->type, "protocol/client") == 0) { |
|
+ ret = dict_get_str(xl->options, "remote-host", &rh); |
|
+ if (ret < 0) |
|
+ goto out; |
|
+ |
|
+ if (gf_is_local_addr(rh)) { |
|
+ local_brick = GF_CALLOC(1, sizeof(tier_brick_list_t), |
|
+ gf_tier_mt_bricklist_t); |
|
+ if (!local_brick) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_get_str(xl->options, "remote-subvolume", &rv); |
|
+ if (ret < 0) |
|
+ goto out; |
|
+ |
|
+ brickname = strrchr(rv, '/') + 1; |
|
+ snprintf(db_name, sizeof(db_name), "%s.db", brickname); |
|
+ |
|
+ local_brick->brick_db_path = GF_MALLOC(PATH_MAX, gf_common_mt_char); |
|
+ if (!local_brick->brick_db_path) { |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Failed to allocate memory for" |
|
+ " bricklist."); |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ len = snprintf(local_brick->brick_db_path, PATH_MAX, "%s/%s/%s", rv, |
|
+ GF_HIDDEN_PATH, db_name); |
|
+ if ((len < 0) || (len >= PATH_MAX)) { |
|
+ gf_msg("tier", GF_LOG_ERROR, EINVAL, DHT_MSG_LOG_TIER_STATUS, |
|
+ "DB path too long"); |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ local_brick->xlator = xl; |
|
+ |
|
+ snprintf(local_brick->brick_name, NAME_MAX, "%s", brickname); |
|
+ |
|
+ list_add_tail(&(local_brick->list), local_bricklist_head); |
|
+ |
|
+ ret = 0; |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ |
|
+ for (child = xl->children; child; child = child->next) { |
|
+ ret = tier_get_bricklist(child->xlator, local_bricklist_head); |
|
+ if (ret) { |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ |
|
+ if (ret) { |
|
+ if (local_brick) { |
|
+ GF_FREE(local_brick->brick_db_path); |
|
+ } |
|
+ GF_FREE(local_brick); |
|
+ } |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+tier_get_freq_demote(gf_tier_conf_t *tier_conf) |
|
+{ |
|
+ if ((tier_conf->mode == TIER_MODE_WM) && |
|
+ (tier_conf->watermark_last == TIER_WM_HI)) |
|
+ return DEFAULT_DEMOTE_DEGRADED; |
|
+ else |
|
+ return tier_conf->tier_demote_frequency; |
|
+} |
|
+ |
|
+int |
|
+tier_get_freq_promote(gf_tier_conf_t *tier_conf) |
|
+{ |
|
+ return tier_conf->tier_promote_frequency; |
|
+} |
|
+ |
|
+int |
|
+tier_get_freq_compact_hot(gf_tier_conf_t *tier_conf) |
|
+{ |
|
+ return tier_conf->tier_compact_hot_frequency; |
|
+} |
|
+ |
|
+int |
|
+tier_get_freq_compact_cold(gf_tier_conf_t *tier_conf) |
|
+{ |
|
+ return tier_conf->tier_compact_cold_frequency; |
|
+} |
|
+ |
|
+static int |
|
+tier_check_demote(gfdb_time_t current_time, int freq) |
|
+{ |
|
+ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; |
|
+} |
|
+ |
|
+static gf_boolean_t |
|
+tier_check_promote(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, |
|
+ int freq) |
|
+{ |
|
+ if ((tier_conf->mode == TIER_MODE_WM) && |
|
+ (tier_conf->watermark_last == TIER_WM_HI)) |
|
+ return _gf_false; |
|
+ |
|
+ else |
|
+ return ((current_time.tv_sec % freq) == 0) ? _gf_true : _gf_false; |
|
+} |
|
+ |
|
+static gf_boolean_t |
|
+tier_check_compact(gf_tier_conf_t *tier_conf, gfdb_time_t current_time, |
|
+ int freq_compact) |
|
+{ |
|
+ if (!(tier_conf->compact_active || tier_conf->compact_mode_switched)) |
|
+ return _gf_false; |
|
+ |
|
+ return ((current_time.tv_sec % freq_compact) == 0) ? _gf_true : _gf_false; |
|
+} |
|
+ |
|
+void |
|
+clear_bricklist(struct list_head *brick_list) |
|
+{ |
|
+ tier_brick_list_t *local_brick = NULL; |
|
+ tier_brick_list_t *temp = NULL; |
|
+ |
|
+ if (list_empty(brick_list)) { |
|
+ return; |
|
+ } |
|
+ |
|
+ list_for_each_entry_safe(local_brick, temp, brick_list, list) |
|
+ { |
|
+ list_del(&local_brick->list); |
|
+ GF_FREE(local_brick->brick_db_path); |
|
+ GF_FREE(local_brick); |
|
+ } |
|
+} |
|
+ |
|
+static void |
|
+set_brick_list_qpath(struct list_head *brick_list, gf_boolean_t is_cold) |
|
+{ |
|
+ tier_brick_list_t *local_brick = NULL; |
|
+ int i = 0; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", brick_list, out); |
|
+ |
|
+ list_for_each_entry(local_brick, brick_list, list) |
|
+ { |
|
+ /* Construct query file path for this brick |
|
+ * i.e |
|
+ * /var/run/gluster/xlator_name/ |
|
+ * {promote/demote}-brickname-indexinbricklist |
|
+ * So that no two query files will have same path even |
|
+ * bricks have the same name |
|
+ * */ |
|
+ snprintf(local_brick->qfile_path, PATH_MAX, "%s-%s-%d", |
|
+ GET_QFILE_PATH(is_cold), local_brick->brick_name, i); |
|
+ i++; |
|
+ } |
|
+out: |
|
+ return; |
|
+} |
|
+ |
|
+static int |
|
+tier_prepare_compact(migration_args_t *args, gfdb_time_t current_time) |
|
+{ |
|
+ xlator_t *this = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ gf_tier_conf_t *tier_conf = NULL; |
|
+ gf_boolean_t is_hot_tier = args->is_hot_tier; |
|
+ int freq = 0; |
|
+ int ret = -1; |
|
+ const char *tier_type = is_hot_tier ? "hot" : "cold"; |
|
+ |
|
+ this = args->this; |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ defrag = conf->defrag; |
|
+ |
|
+ tier_conf = &defrag->tier_conf; |
|
+ |
|
+ freq = is_hot_tier ? tier_get_freq_compact_hot(tier_conf) |
|
+ : tier_get_freq_compact_cold(tier_conf); |
|
+ |
|
+ defrag->tier_conf.compact_mode_switched = |
|
+ is_hot_tier ? defrag->tier_conf.compact_mode_switched_hot |
|
+ : defrag->tier_conf.compact_mode_switched_cold; |
|
+ |
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Compact mode %i", defrag->tier_conf.compact_mode_switched); |
|
+ |
|
+ if (tier_check_compact(tier_conf, current_time, freq)) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Start compaction on %s tier", tier_type); |
|
+ |
|
+ args->freq_time = freq; |
|
+ ret = tier_compact(args); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Compaction failed on " |
|
+ "%s tier", |
|
+ tier_type); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "End compaction on %s tier", tier_type); |
|
+ |
|
+ if (is_hot_tier) { |
|
+ defrag->tier_conf.compact_mode_switched_hot = _gf_false; |
|
+ } else { |
|
+ defrag->tier_conf.compact_mode_switched_cold = _gf_false; |
|
+ } |
|
+ } |
|
+ |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+static int |
|
+tier_get_wm_interval(tier_mode_t mode, tier_watermark_op_t wm) |
|
+{ |
|
+ if (mode == TIER_MODE_WM && wm == TIER_WM_HI) |
|
+ return WM_INTERVAL_EMERG; |
|
+ |
|
+ return WM_INTERVAL; |
|
+} |
|
+ |
|
+/* |
|
+ * Main tiering loop. This is called from the promotion and the |
|
+ * demotion threads spawned in tier_start(). |
|
+ * |
|
+ * Every second, wake from sleep to perform tasks. |
|
+ * 1. Check trigger to migrate data. |
|
+ * 2. Check for state changes (pause, unpause, stop). |
|
+ */ |
|
+static void * |
|
+tier_run(void *in_args) |
|
+{ |
|
+ dht_conf_t *conf = NULL; |
|
+ gfdb_time_t current_time = {0}; |
|
+ int freq = 0; |
|
+ int ret = 0; |
|
+ xlator_t *any = NULL; |
|
+ xlator_t *xlator = NULL; |
|
+ gf_tier_conf_t *tier_conf = NULL; |
|
+ loc_t root_loc = {0}; |
|
+ int check_watermark = 0; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ xlator_t *this = NULL; |
|
+ migration_args_t *args = in_args; |
|
+ GF_VALIDATE_OR_GOTO("tier", args, out); |
|
+ GF_VALIDATE_OR_GOTO("tier", args->brick_list, out); |
|
+ |
|
+ this = args->this; |
|
+ GF_VALIDATE_OR_GOTO("tier", this, out); |
|
+ |
|
+ conf = this->private; |
|
+ GF_VALIDATE_OR_GOTO("tier", conf, out); |
|
+ |
|
+ defrag = conf->defrag; |
|
+ GF_VALIDATE_OR_GOTO("tier", defrag, out); |
|
+ |
|
+ if (list_empty(args->brick_list)) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Brick list for tier is empty. Exiting."); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED; |
|
+ tier_conf = &defrag->tier_conf; |
|
+ |
|
+ dht_build_root_loc(defrag->root_inode, &root_loc); |
|
+ |
|
+ while (1) { |
|
+ /* |
|
+ * Check if a graph switch occurred. If so, stop migration |
|
+ * thread. It will need to be restarted manually. |
|
+ */ |
|
+ any = THIS->ctx->active->first; |
|
+ xlator = xlator_search_by_name(any, this->name); |
|
+ |
|
+ if (xlator != this) { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Detected graph switch. Exiting migration " |
|
+ "daemon."); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ gf_defrag_check_pause_tier(tier_conf); |
|
+ |
|
+ sleep(1); |
|
+ |
|
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) { |
|
+ ret = 1; |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "defrag->defrag_status != " |
|
+ "GF_DEFRAG_STATUS_STARTED"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER || |
|
+ defrag->cmd == GF_DEFRAG_CMD_DETACH_START) { |
|
+ ret = 0; |
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE; |
|
+ gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "defrag->defrag_cmd == " |
|
+ "GF_DEFRAG_CMD_START_DETACH_TIER"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (gf_defrag_get_pause_state(&defrag->tier_conf) != TIER_RUNNING) |
|
+ continue; |
|
+ |
|
+ /* To have proper synchronization amongst all |
|
+ * brick holding nodes, so that promotion and demotions |
|
+ * start atomically w.r.t promotion/demotion frequency |
|
+ * period, all nodes should have their system time |
|
+ * in-sync with each other either manually set or |
|
+ * using a NTP server*/ |
|
+ ret = gettimeofday(¤t_time, NULL); |
|
+ if (ret == -1) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, errno, |
|
+ DHT_MSG_SYS_CALL_GET_TIME_FAILED, |
|
+ "Failed to get current time"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ check_watermark++; |
|
+ |
|
+ /* emergency demotion requires frequent watermark monitoring */ |
|
+ if (check_watermark >= |
|
+ tier_get_wm_interval(tier_conf->mode, tier_conf->watermark_last)) { |
|
+ check_watermark = 0; |
|
+ if (tier_conf->mode == TIER_MODE_WM) { |
|
+ ret = tier_get_fs_stat(this, &root_loc); |
|
+ if (ret != 0) { |
|
+ continue; |
|
+ } |
|
+ ret = tier_check_watermark(this); |
|
+ if (ret != 0) { |
|
+ gf_msg(this->name, GF_LOG_CRITICAL, errno, |
|
+ DHT_MSG_LOG_TIER_ERROR, "Failed to get watermark"); |
|
+ continue; |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+ if (args->is_promotion) { |
|
+ freq = tier_get_freq_promote(tier_conf); |
|
+ |
|
+ if (tier_check_promote(tier_conf, current_time, freq)) { |
|
+ args->freq_time = freq; |
|
+ ret = tier_promote(args); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Promotion failed"); |
|
+ } |
|
+ } |
|
+ } else if (args->is_compaction) { |
|
+ tier_prepare_compact(args, current_time); |
|
+ } else { |
|
+ freq = tier_get_freq_demote(tier_conf); |
|
+ |
|
+ if (tier_check_demote(current_time, freq)) { |
|
+ args->freq_time = freq; |
|
+ ret = tier_demote(args); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Demotion failed"); |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+ /* Check the statfs immediately after the processing threads |
|
+ return */ |
|
+ check_watermark = WM_INTERVAL; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ |
|
+ args->return_value = ret; |
|
+ |
|
+ return NULL; |
|
+} |
|
+ |
|
+int |
|
+tier_start(xlator_t *this, gf_defrag_info_t *defrag) |
|
+{ |
|
+ pthread_t promote_thread; |
|
+ pthread_t demote_thread; |
|
+ pthread_t hot_compact_thread; |
|
+ pthread_t cold_compact_thread; |
|
+ int ret = -1; |
|
+ struct list_head bricklist_hot = {0}; |
|
+ struct list_head bricklist_cold = {0}; |
|
+ migration_args_t promotion_args = {0}; |
|
+ migration_args_t demotion_args = {0}; |
|
+ migration_args_t hot_compaction_args = {0}; |
|
+ migration_args_t cold_compaction_args = {0}; |
|
+ dht_conf_t *conf = NULL; |
|
+ |
|
+ INIT_LIST_HEAD((&bricklist_hot)); |
|
+ INIT_LIST_HEAD((&bricklist_cold)); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ tier_get_bricklist(conf->subvolumes[1], &bricklist_hot); |
|
+ set_brick_list_qpath(&bricklist_hot, _gf_false); |
|
+ |
|
+ demotion_args.this = this; |
|
+ demotion_args.brick_list = &bricklist_hot; |
|
+ demotion_args.defrag = defrag; |
|
+ demotion_args.is_promotion = _gf_false; |
|
+ demotion_args.is_compaction = _gf_false; |
|
+ |
|
+ ret = gf_thread_create(&demote_thread, NULL, &tier_run, &demotion_args, |
|
+ "tierdem"); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to start demotion thread."); |
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; |
|
+ goto cleanup; |
|
+ } |
|
+ |
|
+ tier_get_bricklist(conf->subvolumes[0], &bricklist_cold); |
|
+ set_brick_list_qpath(&bricklist_cold, _gf_true); |
|
+ |
|
+ promotion_args.this = this; |
|
+ promotion_args.brick_list = &bricklist_cold; |
|
+ promotion_args.defrag = defrag; |
|
+ promotion_args.is_promotion = _gf_true; |
|
+ |
|
+ ret = gf_thread_create(&promote_thread, NULL, &tier_run, &promotion_args, |
|
+ "tierpro"); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to start promotion thread."); |
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; |
|
+ goto waitforspawned; |
|
+ } |
|
+ |
|
+ hot_compaction_args.this = this; |
|
+ hot_compaction_args.brick_list = &bricklist_hot; |
|
+ hot_compaction_args.defrag = defrag; |
|
+ hot_compaction_args.is_promotion = _gf_false; |
|
+ hot_compaction_args.is_compaction = _gf_true; |
|
+ hot_compaction_args.is_hot_tier = _gf_true; |
|
+ |
|
+ ret = gf_thread_create(&hot_compact_thread, NULL, &tier_run, |
|
+ &hot_compaction_args, "tierhcom"); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to start compaction thread."); |
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; |
|
+ goto waitforspawnedpromote; |
|
+ } |
|
+ |
|
+ cold_compaction_args.this = this; |
|
+ cold_compaction_args.brick_list = &bricklist_cold; |
|
+ cold_compaction_args.defrag = defrag; |
|
+ cold_compaction_args.is_promotion = _gf_false; |
|
+ cold_compaction_args.is_compaction = _gf_true; |
|
+ cold_compaction_args.is_hot_tier = _gf_false; |
|
+ |
|
+ ret = gf_thread_create(&cold_compact_thread, NULL, &tier_run, |
|
+ &cold_compaction_args, "tierccom"); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Failed to start compaction thread."); |
|
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED; |
|
+ goto waitforspawnedhotcompact; |
|
+ } |
|
+ pthread_join(cold_compact_thread, NULL); |
|
+ |
|
+waitforspawnedhotcompact: |
|
+ pthread_join(hot_compact_thread, NULL); |
|
+ |
|
+waitforspawnedpromote: |
|
+ pthread_join(promote_thread, NULL); |
|
+ |
|
+waitforspawned: |
|
+ pthread_join(demote_thread, NULL); |
|
+ |
|
+cleanup: |
|
+ clear_bricklist(&bricklist_cold); |
|
+ clear_bricklist(&bricklist_hot); |
|
+ return ret; |
|
+} |
|
+ |
|
+int32_t |
|
+tier_migration_needed(xlator_t *this) |
|
+{ |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ int ret = 0; |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO(this->name, conf, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, conf->defrag, out); |
|
+ |
|
+ defrag = conf->defrag; |
|
+ |
|
+ if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) || |
|
+ (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)) |
|
+ ret = 1; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+int32_t |
|
+tier_migration_get_dst(xlator_t *this, dht_local_t *local) |
|
+{ |
|
+ dht_conf_t *conf = NULL; |
|
+ int32_t ret = -1; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", this, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ defrag = conf->defrag; |
|
+ |
|
+ if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) { |
|
+ local->rebalance.target_node = conf->subvolumes[0]; |
|
+ |
|
+ } else if (conf->subvolumes[0] == local->cached_subvol) |
|
+ local->rebalance.target_node = conf->subvolumes[1]; |
|
+ else |
|
+ local->rebalance.target_node = conf->subvolumes[0]; |
|
+ |
|
+ if (local->rebalance.target_node) |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+xlator_t * |
|
+tier_search(xlator_t *this, dht_layout_t *layout, const char *name) |
|
+{ |
|
+ xlator_t *subvol = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", this, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ subvol = TIER_HASHED_SUBVOL; |
|
+ |
|
+out: |
|
+ return subvol; |
|
+} |
|
+ |
|
+static int |
|
+tier_load_externals(xlator_t *this) |
|
+{ |
|
+ int ret = -1; |
|
+ char *libpathfull = (LIBDIR "/libgfdb.so.0"); |
|
+ get_gfdb_methods_t get_gfdb_methods; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("this", this, out); |
|
+ |
|
+ libhandle = dlopen(libpathfull, RTLD_NOW); |
|
+ if (!libhandle) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Error loading libgfdb.so %s\n", dlerror()); |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ get_gfdb_methods = dlsym(libhandle, "get_gfdb_methods"); |
|
+ if (!get_gfdb_methods) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Error loading get_gfdb_methods()"); |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ get_gfdb_methods(&gfdb_methods); |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ if (ret && libhandle) |
|
+ dlclose(libhandle); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+static tier_mode_t |
|
+tier_validate_mode(char *mode) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ if (strcmp(mode, "test") == 0) { |
|
+ ret = TIER_MODE_TEST; |
|
+ } else { |
|
+ ret = TIER_MODE_WM; |
|
+ } |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+static gf_boolean_t |
|
+tier_validate_compact_mode(char *mode) |
|
+{ |
|
+ gf_boolean_t ret = _gf_false; |
|
+ |
|
+ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "tier_validate_compact_mode: mode = %s", mode); |
|
+ |
|
+ if (!strcmp(mode, "on")) { |
|
+ ret = _gf_true; |
|
+ } else { |
|
+ ret = _gf_false; |
|
+ } |
|
+ |
|
+ gf_msg("tier", GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "tier_validate_compact_mode: ret = %i", ret); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+tier_init_methods(xlator_t *this) |
|
+{ |
|
+ int ret = -1; |
|
+ dht_conf_t *conf = NULL; |
|
+ dht_methods_t *methods = NULL; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("tier", this, err); |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ methods = &(conf->methods); |
|
+ |
|
+ methods->migration_get_dst_subvol = tier_migration_get_dst; |
|
+ methods->migration_other = tier_start; |
|
+ methods->migration_needed = tier_migration_needed; |
|
+ methods->layout_search = tier_search; |
|
+ |
|
+ ret = 0; |
|
+err: |
|
+ return ret; |
|
+} |
|
+ |
|
+static void |
|
+tier_save_vol_name(xlator_t *this) |
|
+{ |
|
+ dht_conf_t *conf = NULL; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ char *suffix = NULL; |
|
+ int name_len = 0; |
|
+ |
|
+ conf = this->private; |
|
+ defrag = conf->defrag; |
|
+ |
|
+ suffix = strstr(this->name, "-tier-dht"); |
|
+ |
|
+ if (suffix) |
|
+ name_len = suffix - this->name; |
|
+ else |
|
+ name_len = strlen(this->name); |
|
+ |
|
+ if (name_len > GD_VOLUME_NAME_MAX) |
|
+ name_len = GD_VOLUME_NAME_MAX; |
|
+ |
|
+ strncpy(defrag->tier_conf.volname, this->name, name_len); |
|
+ defrag->tier_conf.volname[name_len] = 0; |
|
+} |
|
+ |
|
+int |
|
+tier_init(xlator_t *this) |
|
+{ |
|
+ int ret = -1; |
|
+ int freq = 0; |
|
+ int maxsize = 0; |
|
+ dht_conf_t *conf = NULL; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ char *voldir = NULL; |
|
+ char *mode = NULL; |
|
+ char *paused = NULL; |
|
+ tier_mode_t tier_mode = DEFAULT_TIER_MODE; |
|
+ gf_boolean_t compact_mode = _gf_false; |
|
+ |
|
+ ret = dht_init(this); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "tier_init failed"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ ret = tier_init_methods(this); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "tier_init_methods failed"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (conf->subvolume_cnt != 2) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Invalid number of subvolumes %d", conf->subvolume_cnt); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* if instatiated from client side initialization is complete. */ |
|
+ if (!conf->defrag) { |
|
+ ret = 0; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* if instatiated from server side, load db libraries */ |
|
+ ret = tier_load_externals(this); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "Could not load externals. Aborting"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ defrag = conf->defrag; |
|
+ |
|
+ defrag->tier_conf.last_demote_qfile_index = 0; |
|
+ defrag->tier_conf.last_promote_qfile_index = 0; |
|
+ |
|
+ defrag->tier_conf.is_tier = 1; |
|
+ defrag->this = this; |
|
+ |
|
+ ret = dict_get_int32(this->options, "tier-max-promote-file-size", &maxsize); |
|
+ if (ret) { |
|
+ maxsize = 0; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.tier_max_promote_size = maxsize; |
|
+ |
|
+ ret = dict_get_int32(this->options, "tier-promote-frequency", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_PROMOTE_FREQ_SEC; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.tier_promote_frequency = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "tier-demote-frequency", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_DEMOTE_FREQ_SEC; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.tier_demote_frequency = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "tier-hot-compact-frequency", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_HOT_COMPACT_FREQ_SEC; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.tier_compact_hot_frequency = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "tier-cold-compact-frequency", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_COLD_COMPACT_FREQ_SEC; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.tier_compact_cold_frequency = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "watermark-hi", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_WM_HI; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.watermark_hi = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "watermark-low", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_WM_LOW; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.watermark_low = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "write-freq-threshold", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_WRITE_FREQ_SEC; |
|
+ } |
|
+ |
|
+ defrag->write_freq_threshold = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "read-freq-threshold", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_READ_FREQ_SEC; |
|
+ } |
|
+ |
|
+ defrag->read_freq_threshold = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "tier-max-mb", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_TIER_MAX_MIGRATE_MB; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.max_migrate_bytes = (uint64_t)freq * 1024 * 1024; |
|
+ |
|
+ ret = dict_get_int32(this->options, "tier-max-files", &freq); |
|
+ if (ret) { |
|
+ freq = DEFAULT_TIER_MAX_MIGRATE_FILES; |
|
+ } |
|
+ |
|
+ defrag->tier_conf.max_migrate_files = freq; |
|
+ |
|
+ ret = dict_get_int32(this->options, "tier-query-limit", |
|
+ &(defrag->tier_conf.query_limit)); |
|
+ if (ret) { |
|
+ defrag->tier_conf.query_limit = DEFAULT_TIER_QUERY_LIMIT; |
|
+ } |
|
+ |
|
+ ret = dict_get_str(this->options, "tier-compact", &mode); |
|
+ |
|
+ if (ret) { |
|
+ defrag->tier_conf.compact_active = DEFAULT_COMP_MODE; |
|
+ } else { |
|
+ compact_mode = tier_validate_compact_mode(mode); |
|
+ /* If compaction is now active, we need to inform the bricks on |
|
+ the hot and cold tier of this. See dht-common.h for more. */ |
|
+ defrag->tier_conf.compact_active = compact_mode; |
|
+ if (compact_mode) { |
|
+ defrag->tier_conf.compact_mode_switched_hot = _gf_true; |
|
+ defrag->tier_conf.compact_mode_switched_cold = _gf_true; |
|
+ } |
|
+ } |
|
+ |
|
+ ret = dict_get_str(this->options, "tier-mode", &mode); |
|
+ if (ret) { |
|
+ defrag->tier_conf.mode = DEFAULT_TIER_MODE; |
|
+ } else { |
|
+ tier_mode = tier_validate_mode(mode); |
|
+ defrag->tier_conf.mode = tier_mode; |
|
+ } |
|
+ |
|
+ pthread_mutex_init(&defrag->tier_conf.pause_mutex, 0); |
|
+ |
|
+ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_RUNNING); |
|
+ |
|
+ ret = dict_get_str(this->options, "tier-pause", &paused); |
|
+ |
|
+ if (paused && strcmp(paused, "on") == 0) |
|
+ gf_defrag_set_pause_state(&defrag->tier_conf, TIER_REQUEST_PAUSE); |
|
+ |
|
+ ret = gf_asprintf(&voldir, "%s/%s", DEFAULT_VAR_RUN_DIRECTORY, this->name); |
|
+ if (ret < 0) |
|
+ goto out; |
|
+ |
|
+ ret = mkdir_p(voldir, 0777, _gf_true); |
|
+ if (ret == -1 && errno != EEXIST) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "tier_init failed"); |
|
+ |
|
+ GF_FREE(voldir); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ GF_FREE(voldir); |
|
+ |
|
+ ret = gf_asprintf(&promotion_qfile, "%s/%s/promote", |
|
+ DEFAULT_VAR_RUN_DIRECTORY, this->name); |
|
+ if (ret < 0) |
|
+ goto out; |
|
+ |
|
+ ret = gf_asprintf(&demotion_qfile, "%s/%s/demote", |
|
+ DEFAULT_VAR_RUN_DIRECTORY, this->name); |
|
+ if (ret < 0) { |
|
+ GF_FREE(promotion_qfile); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "Promote/demote frequency %d/%d " |
|
+ "Write/Read freq thresholds %d/%d", |
|
+ defrag->tier_conf.tier_promote_frequency, |
|
+ defrag->tier_conf.tier_demote_frequency, |
|
+ defrag->write_freq_threshold, defrag->read_freq_threshold); |
|
+ |
|
+ tier_save_vol_name(this); |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+tier_cli_pause_done(int op_ret, call_frame_t *sync_frame, void *data) |
|
+{ |
|
+ gf_msg("tier", GF_LOG_INFO, 0, DHT_MSG_TIER_PAUSED, |
|
+ "Migrate file paused with op_ret %d", op_ret); |
|
+ |
|
+ return op_ret; |
|
+} |
|
+ |
|
+int |
|
+tier_cli_pause(void *data) |
|
+{ |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ xlator_t *this = NULL; |
|
+ dht_conf_t *conf = NULL; |
|
+ int ret = -1; |
|
+ |
|
+ this = data; |
|
+ |
|
+ conf = this->private; |
|
+ GF_VALIDATE_OR_GOTO(this->name, conf, exit); |
|
+ |
|
+ defrag = conf->defrag; |
|
+ GF_VALIDATE_OR_GOTO(this->name, defrag, exit); |
|
+ |
|
+ gf_defrag_pause_tier(this, defrag); |
|
+ |
|
+ ret = 0; |
|
+exit: |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+tier_reconfigure(xlator_t *this, dict_t *options) |
|
+{ |
|
+ dht_conf_t *conf = NULL; |
|
+ gf_defrag_info_t *defrag = NULL; |
|
+ char *mode = NULL; |
|
+ int migrate_mb = 0; |
|
+ gf_boolean_t req_pause = _gf_false; |
|
+ int ret = 0; |
|
+ call_frame_t *frame = NULL; |
|
+ gf_boolean_t last_compact_setting = _gf_false; |
|
+ |
|
+ conf = this->private; |
|
+ |
|
+ if (conf->defrag) { |
|
+ defrag = conf->defrag; |
|
+ GF_OPTION_RECONF("tier-max-promote-file-size", |
|
+ defrag->tier_conf.tier_max_promote_size, options, |
|
+ int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("tier-promote-frequency", |
|
+ defrag->tier_conf.tier_promote_frequency, options, |
|
+ int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("tier-demote-frequency", |
|
+ defrag->tier_conf.tier_demote_frequency, options, |
|
+ int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("write-freq-threshold", defrag->write_freq_threshold, |
|
+ options, int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("read-freq-threshold", defrag->read_freq_threshold, |
|
+ options, int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("watermark-hi", defrag->tier_conf.watermark_hi, |
|
+ options, int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("watermark-low", defrag->tier_conf.watermark_low, |
|
+ options, int32, out); |
|
+ |
|
+ last_compact_setting = defrag->tier_conf.compact_active; |
|
+ |
|
+ GF_OPTION_RECONF("tier-compact", defrag->tier_conf.compact_active, |
|
+ options, bool, out); |
|
+ |
|
+ if (last_compact_setting != defrag->tier_conf.compact_active) { |
|
+ defrag->tier_conf.compact_mode_switched_hot = _gf_true; |
|
+ defrag->tier_conf.compact_mode_switched_cold = _gf_true; |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LOG_TIER_STATUS, |
|
+ "compact mode switched"); |
|
+ } |
|
+ |
|
+ GF_OPTION_RECONF("tier-hot-compact-frequency", |
|
+ defrag->tier_conf.tier_compact_hot_frequency, options, |
|
+ int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("tier-cold-compact-frequency", |
|
+ defrag->tier_conf.tier_compact_cold_frequency, options, |
|
+ int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("tier-mode", mode, options, str, out); |
|
+ defrag->tier_conf.mode = tier_validate_mode(mode); |
|
+ |
|
+ GF_OPTION_RECONF("tier-max-mb", migrate_mb, options, int32, out); |
|
+ defrag->tier_conf.max_migrate_bytes = (uint64_t)migrate_mb * 1024 * |
|
+ 1024; |
|
+ |
|
+ GF_OPTION_RECONF("tier-max-files", defrag->tier_conf.max_migrate_files, |
|
+ options, int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("tier-query-limit", defrag->tier_conf.query_limit, |
|
+ options, int32, out); |
|
+ |
|
+ GF_OPTION_RECONF("tier-pause", req_pause, options, bool, out); |
|
+ |
|
+ if (req_pause == _gf_true) { |
|
+ frame = create_frame(this, this->ctx->pool); |
|
+ if (!frame) |
|
+ goto out; |
|
+ |
|
+ frame->root->pid = GF_CLIENT_PID_DEFRAG; |
|
+ |
|
+ ret = synctask_new(this->ctx->env, tier_cli_pause, |
|
+ tier_cli_pause_done, frame, this); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "pause tier failed on reconfigure"); |
|
+ } |
|
+ } else { |
|
+ ret = gf_defrag_resume_tier(this, defrag); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LOG_TIER_ERROR, |
|
+ "resume tier failed on reconfigure"); |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+out: |
|
+ return dht_reconfigure(this, options); |
|
+} |
|
+ |
|
+void |
|
+tier_fini(xlator_t *this) |
|
+{ |
|
+ if (libhandle) |
|
+ dlclose(libhandle); |
|
+ |
|
+ GF_FREE(demotion_qfile); |
|
+ GF_FREE(promotion_qfile); |
|
+ |
|
+ dht_fini(this); |
|
+} |
|
+ |
|
+struct xlator_fops fops = { |
|
+ |
|
+ .lookup = dht_lookup, |
|
+ .create = tier_create, |
|
+ .mknod = dht_mknod, |
|
+ |
|
+ .open = dht_open, |
|
+ .statfs = tier_statfs, |
|
+ .opendir = dht_opendir, |
|
+ .readdir = tier_readdir, |
|
+ .readdirp = tier_readdirp, |
|
+ .fsyncdir = dht_fsyncdir, |
|
+ .symlink = dht_symlink, |
|
+ .unlink = tier_unlink, |
|
+ .link = tier_link, |
|
+ .mkdir = dht_mkdir, |
|
+ .rmdir = dht_rmdir, |
|
+ .rename = dht_rename, |
|
+ .entrylk = dht_entrylk, |
|
+ .fentrylk = dht_fentrylk, |
|
+ |
|
+ /* Inode read operations */ |
|
+ .stat = dht_stat, |
|
+ .fstat = dht_fstat, |
|
+ .access = dht_access, |
|
+ .readlink = dht_readlink, |
|
+ .getxattr = dht_getxattr, |
|
+ .fgetxattr = dht_fgetxattr, |
|
+ .readv = dht_readv, |
|
+ .flush = dht_flush, |
|
+ .fsync = dht_fsync, |
|
+ .inodelk = dht_inodelk, |
|
+ .finodelk = dht_finodelk, |
|
+ .lk = dht_lk, |
|
+ |
|
+ /* Inode write operations */ |
|
+ .fremovexattr = dht_fremovexattr, |
|
+ .removexattr = dht_removexattr, |
|
+ .setxattr = dht_setxattr, |
|
+ .fsetxattr = dht_fsetxattr, |
|
+ .truncate = dht_truncate, |
|
+ .ftruncate = dht_ftruncate, |
|
+ .writev = dht_writev, |
|
+ .xattrop = dht_xattrop, |
|
+ .fxattrop = dht_fxattrop, |
|
+ .setattr = dht_setattr, |
|
+ .fsetattr = dht_fsetattr, |
|
+ .fallocate = dht_fallocate, |
|
+ .discard = dht_discard, |
|
+ .zerofill = dht_zerofill, |
|
+}; |
|
+ |
|
+struct xlator_cbks cbks = {.release = dht_release, .forget = dht_forget}; |
|
+ |
|
+extern int32_t |
|
+mem_acct_init(xlator_t *this); |
|
+ |
|
+extern struct volume_options dht_options[]; |
|
+ |
|
+xlator_api_t xlator_api = { |
|
+ .init = tier_init, |
|
+ .fini = tier_fini, |
|
+ .notify = dht_notify, |
|
+ .reconfigure = tier_reconfigure, |
|
+ .mem_acct_init = mem_acct_init, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */ |
|
+ .fops = &fops, |
|
+ .cbks = &cbks, |
|
+ .options = dht_options, |
|
+ .identifier = "tier", |
|
+ .category = GF_MAINTAINED, |
|
+}; |
|
+ |
|
diff --git a/xlators/cluster/dht/src/tier.h b/xlators/cluster/dht/src/tier.h |
|
new file mode 100644 |
|
index 0000000..a20b1db |
|
--- /dev/null |
|
+++ b/xlators/cluster/dht/src/tier.h |
|
@@ -0,0 +1,110 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#ifndef _TIER_H_ |
|
+#define _TIER_H_ |
|
+ |
|
+/******************************************************************************/ |
|
+/* This is from dht-rebalancer.c as we don't have dht-rebalancer.h */ |
|
+#include "dht-common.h" |
|
+#include <glusterfs/xlator.h> |
|
+#include <signal.h> |
|
+#include <fnmatch.h> |
|
+#include <signal.h> |
|
+ |
|
+/* |
|
+ * Size of timer wheel. We would not promote or demote less |
|
+ * frequently than this number. |
|
+ */ |
|
+#define TIMER_SECS 3600 |
|
+ |
|
+#include "gfdb_data_store.h" |
|
+#include <ctype.h> |
|
+#include <sys/stat.h> |
|
+ |
|
+#define PROMOTION_QFILE "promotequeryfile" |
|
+#define DEMOTION_QFILE "demotequeryfile" |
|
+ |
|
+#define TIER_HASHED_SUBVOL conf->subvolumes[0] |
|
+#define TIER_UNHASHED_SUBVOL conf->subvolumes[1] |
|
+ |
|
+#define GET_QFILE_PATH(is_promotion) \ |
|
+ (is_promotion) ? promotion_qfile : demotion_qfile |
|
+ |
|
+typedef struct tier_qfile_array { |
|
+ int *fd_array; |
|
+ ssize_t array_size; |
|
+ ssize_t next_index; |
|
+ /* Indicate the number of exhuasted FDs*/ |
|
+ ssize_t exhausted_count; |
|
+} tier_qfile_array_t; |
|
+ |
|
+typedef struct _query_cbk_args { |
|
+ xlator_t *this; |
|
+ gf_defrag_info_t *defrag; |
|
+ /* This is write */ |
|
+ int query_fd; |
|
+ int is_promotion; |
|
+ int is_compaction; |
|
+ /* This is for read */ |
|
+ tier_qfile_array_t *qfile_array; |
|
+} query_cbk_args_t; |
|
+ |
|
+int |
|
+gf_run_tier(xlator_t *this, gf_defrag_info_t *defrag); |
|
+ |
|
+typedef struct gfdb_brick_info { |
|
+ gfdb_time_t *time_stamp; |
|
+ gf_boolean_t _gfdb_promote; |
|
+ query_cbk_args_t *_query_cbk_args; |
|
+} gfdb_brick_info_t; |
|
+ |
|
+typedef struct brick_list { |
|
+ xlator_t *xlator; |
|
+ char *brick_db_path; |
|
+ char brick_name[NAME_MAX]; |
|
+ char qfile_path[PATH_MAX]; |
|
+ struct list_head list; |
|
+} tier_brick_list_t; |
|
+ |
|
+typedef struct _dm_thread_args { |
|
+ xlator_t *this; |
|
+ gf_defrag_info_t *defrag; |
|
+ struct list_head *brick_list; |
|
+ int freq_time; |
|
+ int return_value; |
|
+ int is_promotion; |
|
+ int is_compaction; |
|
+ gf_boolean_t is_hot_tier; |
|
+} migration_args_t; |
|
+ |
|
+typedef enum tier_watermark_op_ { |
|
+ TIER_WM_NONE = 0, |
|
+ TIER_WM_LOW, |
|
+ TIER_WM_HI, |
|
+ TIER_WM_MID |
|
+} tier_watermark_op_t; |
|
+ |
|
+#define DEFAULT_PROMOTE_FREQ_SEC 120 |
|
+#define DEFAULT_DEMOTE_FREQ_SEC 120 |
|
+#define DEFAULT_HOT_COMPACT_FREQ_SEC 604800 |
|
+#define DEFAULT_COLD_COMPACT_FREQ_SEC 604800 |
|
+#define DEFAULT_DEMOTE_DEGRADED 1 |
|
+#define DEFAULT_WRITE_FREQ_SEC 0 |
|
+#define DEFAULT_READ_FREQ_SEC 0 |
|
+#define DEFAULT_WM_LOW 75 |
|
+#define DEFAULT_WM_HI 90 |
|
+#define DEFAULT_TIER_MODE TIER_MODE_TEST |
|
+#define DEFAULT_COMP_MODE _gf_true |
|
+#define DEFAULT_TIER_MAX_MIGRATE_MB 1000 |
|
+#define DEFAULT_TIER_MAX_MIGRATE_FILES 5000 |
|
+#define DEFAULT_TIER_QUERY_LIMIT 100 |
|
+ |
|
+#endif |
|
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am |
|
index 194634b..545c02b 100644 |
|
--- a/xlators/features/Makefile.am |
|
+++ b/xlators/features/Makefile.am |
|
@@ -5,6 +5,6 @@ endif |
|
SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \ |
|
compress changelog gfid-access snapview-client snapview-server trash \ |
|
shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \ |
|
- utime |
|
+ utime changetimerecorder |
|
|
|
CLEANFILES = |
|
diff --git a/xlators/features/changetimerecorder/Makefile.am b/xlators/features/changetimerecorder/Makefile.am |
|
new file mode 100644 |
|
index 0000000..a985f42 |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/Makefile.am |
|
@@ -0,0 +1,3 @@ |
|
+SUBDIRS = src |
|
+ |
|
+CLEANFILES = |
|
diff --git a/xlators/features/changetimerecorder/src/Makefile.am b/xlators/features/changetimerecorder/src/Makefile.am |
|
new file mode 100644 |
|
index 0000000..620017e |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/Makefile.am |
|
@@ -0,0 +1,26 @@ |
|
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features |
|
+ |
|
+# changetimerecorder can only get build when libgfdb is enabled |
|
+if BUILD_GFDB |
|
+ xlator_LTLIBRARIES = changetimerecorder.la |
|
+endif |
|
+ |
|
+changetimerecorder_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) |
|
+ |
|
+changetimerecorder_la_SOURCES = changetimerecorder.c \ |
|
+ ctr-helper.c ctr-xlator-ctx.c |
|
+ |
|
+changetimerecorder_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ |
|
+ $(top_builddir)/libglusterfs/src/gfdb/libgfdb.la |
|
+ |
|
+noinst_HEADERS = ctr-messages.h changetimerecorder.h ctr_mem_types.h \ |
|
+ ctr-helper.h ctr-xlator-ctx.h |
|
+ |
|
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ |
|
+ -I$(top_srcdir)/libglusterfs/src/gfdb \ |
|
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ |
|
+ -DDATADIR=\"$(localstatedir)\" |
|
+ |
|
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(SQLITE_CFLAGS) |
|
+ |
|
+CLEANFILES = |
|
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.c b/xlators/features/changetimerecorder/src/changetimerecorder.c |
|
new file mode 100644 |
|
index 0000000..f2aa4a9 |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.c |
|
@@ -0,0 +1,2371 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+#include <ctype.h> |
|
+#include <sys/uio.h> |
|
+ |
|
+#include "gfdb_sqlite3.h" |
|
+#include "ctr-helper.h" |
|
+#include "ctr-messages.h" |
|
+#include <glusterfs/syscall.h> |
|
+ |
|
+#include "changetimerecorder.h" |
|
+#include "tier-ctr-interface.h" |
|
+ |
|
+/*******************************inode forget***********************************/ |
|
+int |
|
+ctr_forget(xlator_t *this, inode_t *inode) |
|
+{ |
|
+ fini_ctr_xlator_ctx(this, inode); |
|
+ return 0; |
|
+} |
|
+ |
|
+/************************** Look up heal **************************************/ |
|
+/* |
|
+Problem: The CTR xlator records file meta (heat/hardlinks) |
|
+into the data. This works fine for files which are created |
|
+after ctr xlator is switched ON. But for files which were |
|
+created before CTR xlator is ON, CTR xlator is not able to |
|
+record either of the meta i.e heat or hardlinks. Thus making |
|
+those files immune to promotions/demotions. |
|
+ |
|
+Solution: The solution that is implemented in this patch is |
|
+do ctr-db heal of all those pre-existent files, using named lookup. |
|
+For this purpose we use the inode-xlator context variable option |
|
+in gluster. |
|
+The inode-xlator context variable for ctr xlator will have the |
|
+following, |
|
+ a. A Lock for the context variable |
|
+ b. A hardlink list: This list represents the successful looked |
|
+ up hardlinks. |
|
+These are the scenarios when the hardlink list is updated: |
|
+1) Named-Lookup: Whenever a named lookup happens on a file, in the |
|
+ wind path we copy all required hardlink and inode information to |
|
+ ctr_db_record structure, which resides in the frame->local variable. |
|
+ We don't update the database in wind. During the unwind, we read the |
|
+ information from the ctr_db_record and , |
|
+ Check if the inode context variable is created, if not we create it. |
|
+ Check if the hard link is there in the hardlink list. |
|
+ If its not there we add it to the list and send a update to the |
|
+ database using libgfdb. |
|
+ Please note: The database transaction can fail(and we ignore) as there |
|
+ already might be a record in the db. This update to the db is to heal |
|
+ if its not there. |
|
+ If its there in the list we ignore it. |
|
+2) Inode Forget: Whenever an inode forget hits we clear the hardlink list in |
|
+ the inode context variable and delete the inode context variable. |
|
+ Please note: An inode forget may happen for two reason, |
|
+ a. when the inode is delete. |
|
+ b. the in-memory inode is evicted from the inode table due to cache limits. |
|
+3) create: whenever a create happens we create the inode context variable and |
|
+ add the hardlink. The database updation is done as usual by ctr. |
|
+4) link: whenever a hardlink is created for the inode, we create the inode |
|
+ context variable, if not present, and add the hardlink to the list. |
|
+5) unlink: whenever a unlink happens we delete the hardlink from the list. |
|
+6) mknod: same as create. |
|
+7) rename: whenever a rename happens we update the hardlink in list. if the |
|
+ hardlink was not present for updation, we add the hardlink to the list. |
|
+ |
|
+What is pending: |
|
+1) This solution will only work for named lookups. |
|
+2) We don't track afr-self-heal/dht-rebalancer traffic for healing. |
|
+ |
|
+*/ |
|
+ |
|
+/* This function does not write anything to the db, |
|
+ * just created the local variable |
|
+ * for the frame and sets values for the ctr_db_record */ |
|
+static int |
|
+ctr_lookup_wind(call_frame_t *frame, xlator_t *this, |
|
+ gf_ctr_inode_context_t *ctr_inode_cx) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_private_t *_priv = NULL; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(frame->root); |
|
+ GF_ASSERT(this); |
|
+ IS_CTR_INODE_CX_SANE(ctr_inode_cx); |
|
+ |
|
+ _priv = this->private; |
|
+ GF_ASSERT(_priv); |
|
+ |
|
+ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) { |
|
+ frame->local = init_ctr_local_t(this); |
|
+ if (!frame->local) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, |
|
+ "WIND: Error while creating ctr local"); |
|
+ goto out; |
|
+ }; |
|
+ ctr_local = frame->local; |
|
+ /*Definitely no internal fops will reach here*/ |
|
+ ctr_local->is_internal_fop = _gf_false; |
|
+ /*Don't record counters*/ |
|
+ CTR_DB_REC(ctr_local).do_record_counters = _gf_false; |
|
+ /*Don't record time at all*/ |
|
+ CTR_DB_REC(ctr_local).do_record_times = _gf_false; |
|
+ |
|
+ /* Copy gfid into db record*/ |
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid)); |
|
+ |
|
+ /* Set fop_path and fop_type, required by libgfdb to make |
|
+ * decision while inserting the record */ |
|
+ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; |
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; |
|
+ |
|
+ /* Copy hard link info*/ |
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid, |
|
+ *((NEW_LINK_CX(ctr_inode_cx))->pargfid)); |
|
+ if (snprintf(CTR_DB_REC(ctr_local).file_name, |
|
+ sizeof(CTR_DB_REC(ctr_local).file_name), "%s", |
|
+ NEW_LINK_CX(ctr_inode_cx)->basename) >= |
|
+ sizeof(CTR_DB_REC(ctr_local).file_name)) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, |
|
+ "WIND: Error copying filename of ctr local"); |
|
+ goto out; |
|
+ } |
|
+ /* Since we are in lookup we can ignore errors while |
|
+ * Inserting in the DB, because there may be many |
|
+ * to write to the DB attempts for healing. |
|
+ * We don't want to log all failed attempts and |
|
+ * bloat the log*/ |
|
+ ctr_local->gfdb_db_record.ignore_errors = _gf_true; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ |
|
+ if (ret) { |
|
+ free_ctr_local(ctr_local); |
|
+ frame->local = NULL; |
|
+ } |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/* This function inserts the ctr_db_record populated by ctr_lookup_wind |
|
+ * in to the db. It also destroys the frame->local created by ctr_lookup_wind */ |
|
+static int |
|
+ctr_lookup_unwind(call_frame_t *frame, xlator_t *this) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_private_t *_priv = NULL; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(this); |
|
+ |
|
+ _priv = this->private; |
|
+ GF_ASSERT(_priv); |
|
+ |
|
+ GF_ASSERT(_priv->_db_conn); |
|
+ |
|
+ ctr_local = frame->local; |
|
+ |
|
+ if (ctr_local && (ctr_local->ia_inode_type != IA_IFDIR)) { |
|
+ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); |
|
+ if (ret == -1) { |
|
+ gf_msg(this->name, |
|
+ _gfdb_log_level(GF_LOG_ERROR, |
|
+ ctr_local->gfdb_db_record.ignore_errors), |
|
+ 0, CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, |
|
+ "UNWIND: Error filling ctr local"); |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ free_ctr_local(ctr_local); |
|
+ frame->local = NULL; |
|
+ return ret; |
|
+} |
|
+ |
|
+/****************************************************************************** |
|
+ * |
|
+ * FOPS HANDLING BELOW |
|
+ * |
|
+ * ***************************************************************************/ |
|
+ |
|
+/****************************LOOKUP********************************************/ |
|
+ |
|
+int32_t |
|
+ctr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, inode_t *inode, |
|
+ struct iatt *buf, dict_t *dict, struct iatt *postparent) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR; |
|
+ gf_boolean_t _is_heal_needed = _gf_false; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ /* if the lookup failed lookup don't do anything*/ |
|
+ if (op_ret == -1) { |
|
+ gf_msg_trace(this->name, 0, "lookup failed with %s", |
|
+ strerror(op_errno)); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* Ignore directory lookups */ |
|
+ if (inode->ia_type == IA_IFDIR) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* if frame local was not set by the ctr_lookup() |
|
+ * so don't so anything*/ |
|
+ if (!frame->local) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* if the lookup is for dht link donot record*/ |
|
+ if (dht_is_linkfile(buf, dict)) { |
|
+ gf_msg_trace(this->name, 0, |
|
+ "Ignoring Lookup " |
|
+ "for dht link file"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ctr_local = frame->local; |
|
+ /*Assign the proper inode type*/ |
|
+ ctr_local->ia_inode_type = inode->ia_type; |
|
+ |
|
+ /* Copy gfid directly from inode */ |
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid); |
|
+ |
|
+ /* Checking if gfid and parent gfid is valid */ |
|
+ if (gf_uuid_is_null(CTR_DB_REC(ctr_local).gfid) || |
|
+ gf_uuid_is_null(CTR_DB_REC(ctr_local).pargfid)) { |
|
+ gf_msg_trace(this->name, 0, "Invalid GFID"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* if its a first entry |
|
+ * then mark the ctr_record for create |
|
+ * A create will attempt a file and a hard link created in the db*/ |
|
+ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode); |
|
+ if (!ctr_xlator_ctx) { |
|
+ /* This marks inode heal */ |
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE; |
|
+ _is_heal_needed = _gf_true; |
|
+ } |
|
+ |
|
+ /* Copy the correct gfid from resolved inode */ |
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, inode->gfid); |
|
+ |
|
+ /* Add hard link to the list */ |
|
+ ret_val = add_hard_link_ctx(frame, this, inode); |
|
+ if (ret_val == CTR_CTX_ERROR) { |
|
+ gf_msg_trace(this->name, 0, "Failed adding hardlink to list"); |
|
+ goto out; |
|
+ } |
|
+ /* If inode needs healing then heal the hardlink also */ |
|
+ else if (ret_val & CTR_TRY_INODE_HEAL) { |
|
+ /* This marks inode heal */ |
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = GFDB_FOP_CREATE_WRITE; |
|
+ _is_heal_needed = _gf_true; |
|
+ } |
|
+ /* If hardlink needs healing */ |
|
+ else if (ret_val & CTR_TRY_HARDLINK_HEAL) { |
|
+ _is_heal_needed = _gf_true; |
|
+ } |
|
+ |
|
+ /* If lookup heal needed */ |
|
+ if (!_is_heal_needed) |
|
+ goto out; |
|
+ |
|
+ /* FINALLY HEAL : Inserts the ctr_db_record populated by ctr_lookup_wind |
|
+ * in to the db. It also destroys the frame->local |
|
+ * created by ctr_lookup_wind */ |
|
+ ret = ctr_lookup_unwind(frame, this); |
|
+ if (ret) { |
|
+ gf_msg_trace(this->name, 0, "Failed healing/inserting link"); |
|
+ } |
|
+ |
|
+out: |
|
+ free_ctr_local((gf_ctr_local_t *)frame->local); |
|
+ frame->local = NULL; |
|
+ |
|
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict, |
|
+ postparent); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) |
|
+{ |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ gf_ctr_link_context_t ctr_link_cx; |
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(frame->root); |
|
+ |
|
+ /* Don't handle nameless lookups*/ |
|
+ if (!loc->parent || !loc->name) |
|
+ goto out; |
|
+ |
|
+ /*fill ctr link context*/ |
|
+ FILL_CTR_LINK_CX(_link_cx, loc->parent->gfid, loc->name, out); |
|
+ |
|
+ /* Fill ctr inode context*/ |
|
+ /* IA_IFREG : We assume its a file in the wind |
|
+ * but in the unwind we are sure what the inode is a file |
|
+ * or directory |
|
+ * gfid: we are just filling loc->gfid which is not correct. |
|
+ * In unwind we fill the correct gfid for successful lookup*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, IA_IFREG, loc->gfid, _link_cx, NULL, |
|
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /* Create the frame->local and populate ctr_db_record |
|
+ * No writing to the db yet */ |
|
+ ret = ctr_lookup_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED, |
|
+ "Failed to insert link wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_lookup_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->lookup, loc, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************WRITEV********************************************/ |
|
+int32_t |
|
+ctr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, |
|
+ struct iatt *postbuf, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, |
|
+ "Failed to insert writev unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, |
|
+ xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, |
|
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, |
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_WRITEV_WIND_FAILED, |
|
+ "Failed to insert writev wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_writev_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags, |
|
+ iobref, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+/******************************setattr*****************************************/ |
|
+ |
|
+int32_t |
|
+ctr_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf, |
|
+ struct iatt *postop_stbuf, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, |
|
+ "Failed to insert setattr unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop_stbuf, |
|
+ postop_stbuf, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, |
|
+ int32_t valid, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, |
|
+ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, |
|
+ "Failed to insert setattr wind"); |
|
+ } |
|
+out: |
|
+ |
|
+ STACK_WIND(frame, ctr_setattr_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+/*************************** fsetattr ***************************************/ |
|
+int32_t |
|
+ctr_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, struct iatt *preop_stbuf, |
|
+ struct iatt *postop_stbuf, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, |
|
+ "Failed to insert fsetattr unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, preop_stbuf, |
|
+ postop_stbuf, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, |
|
+ int32_t valid, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, |
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, |
|
+ "Failed to insert fsetattr wind"); |
|
+ } |
|
+out: |
|
+ STACK_WIND(frame, ctr_fsetattr_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+/****************************fremovexattr************************************/ |
|
+ |
|
+int32_t |
|
+ctr_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED, |
|
+ "Failed to insert fremovexattr unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, |
|
+ const char *name, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, |
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED, |
|
+ "Failed to insert fremovexattr wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_fremovexattr_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************removexattr*************************************/ |
|
+ |
|
+int32_t |
|
+ctr_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED, |
|
+ "Failed to insert removexattr unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, |
|
+ const char *name, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, |
|
+ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED, |
|
+ "Failed to insert removexattr wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_removexattr_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************truncate****************************************/ |
|
+ |
|
+int32_t |
|
+ctr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, |
|
+ struct iatt *postbuf, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED, |
|
+ "Failed to insert truncate unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, |
|
+ xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, |
|
+ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, |
|
+ "Failed to insert truncate wind"); |
|
+ } |
|
+out: |
|
+ STACK_WIND(frame, ctr_truncate_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************ftruncate***************************************/ |
|
+ |
|
+int32_t |
|
+ctr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf, |
|
+ struct iatt *postbuf, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED, |
|
+ "Failed to insert ftruncate unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, |
|
+ xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, |
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, |
|
+ "Failed to insert ftruncate wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_ftruncate_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************rename******************************************/ |
|
+int32_t |
|
+ctr_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, struct iatt *buf, |
|
+ struct iatt *preoldparent, struct iatt *postoldparent, |
|
+ struct iatt *prenewparent, struct iatt *postnewparent, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ uint32_t remaining_links = -1; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ gfdb_fop_type_t fop_type = GFDB_FOP_INVALID_OP; |
|
+ gfdb_fop_path_t fop_path = GFDB_FOP_INVALID; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(this); |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, |
|
+ GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED, |
|
+ "Failed to insert rename unwind"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (!xdata) |
|
+ goto out; |
|
+ /* |
|
+ * |
|
+ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator |
|
+ * This is only set when we are overwriting hardlinks. |
|
+ * |
|
+ * */ |
|
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, |
|
+ &remaining_links); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, |
|
+ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA"); |
|
+ remaining_links = -1; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ctr_local = frame->local; |
|
+ if (!ctr_local) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_NULL_LOCAL, |
|
+ "ctr_local is NULL."); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* This is not the only link */ |
|
+ if (remaining_links > 1) { |
|
+ fop_type = GFDB_FOP_DENTRY_WRITE; |
|
+ fop_path = GFDB_FOP_UNDEL; |
|
+ } |
|
+ /* Last link that was deleted */ |
|
+ else if (remaining_links == 1) { |
|
+ fop_type = GFDB_FOP_DENTRY_WRITE; |
|
+ fop_path = GFDB_FOP_UNDEL_ALL; |
|
+ } else { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_UNWIND_FAILED, |
|
+ "Invalid link count from posix"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = ctr_delete_hard_link_from_db( |
|
+ this, CTR_DB_REC(ctr_local).old_gfid, CTR_DB_REC(ctr_local).pargfid, |
|
+ CTR_DB_REC(ctr_local).file_name, fop_type, fop_path); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, |
|
+ "Failed to delete records of %s", |
|
+ CTR_DB_REC(ctr_local).old_file_name); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent, |
|
+ postoldparent, prenewparent, postnewparent, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ gf_ctr_link_context_t new_link_cx, old_link_cx; |
|
+ gf_ctr_link_context_t *_nlink_cx = &new_link_cx; |
|
+ gf_ctr_link_context_t *_olink_cx = &old_link_cx; |
|
+ int is_dict_created = 0; |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ /*Fill old link context*/ |
|
+ FILL_CTR_LINK_CX(_olink_cx, oldloc->pargfid, oldloc->name, out); |
|
+ |
|
+ /*Fill new link context*/ |
|
+ FILL_CTR_LINK_CX(_nlink_cx, newloc->pargfid, newloc->name, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type, |
|
+ oldloc->inode->gfid, _nlink_cx, _olink_cx, |
|
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /* If the rename is a overwrite of hardlink |
|
+ * rename ("file1", "file2") |
|
+ * file1 is hardlink for gfid say 00000000-0000-0000-0000-00000000000A |
|
+ * file2 is hardlink for gfid say 00000000-0000-0000-0000-00000000000B |
|
+ * so we are saving file2 gfid in old_gfid so that we delete entries |
|
+ * from the db during rename callback if the fop is successful |
|
+ * */ |
|
+ if (newloc->inode) { |
|
+ /* This is the GFID from where the newloc hardlink will be |
|
+ * unlinked */ |
|
+ _inode_cx->old_gfid = &newloc->inode->gfid; |
|
+ } |
|
+ |
|
+ /* Is a metatdata fop */ |
|
+ _inode_cx->is_metadata_fop = _gf_true; |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RENAME_WIND_FAILED, |
|
+ "Failed to insert rename wind"); |
|
+ } else { |
|
+ /* We are doing updation of hard link in inode context in wind |
|
+ * As we don't get the "inode" in the call back for rename */ |
|
+ ret = update_hard_link_ctx(frame, this, oldloc->inode); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_UPDATE_HARDLINK_FAILED, |
|
+ "Failed " |
|
+ "updating hard link in ctr inode context"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* If the newloc has an inode. i.e acquiring hardlink of an |
|
+ * exisitng file i.e overwritting a file. |
|
+ * */ |
|
+ if (newloc->inode) { |
|
+ /* Getting the ctr inode context variable for |
|
+ * inode whose hardlink will be acquired during |
|
+ * the rename |
|
+ * */ |
|
+ ctr_xlator_ctx = get_ctr_xlator_ctx(this, newloc->inode); |
|
+ if (!ctr_xlator_ctx) { |
|
+ /* Since there is no ctr inode context |
|
+ * so nothing more to do */ |
|
+ ret = 0; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* Deleting hardlink from context variable */ |
|
+ ret = ctr_delete_hard_link(this, ctr_xlator_ctx, newloc->pargfid, |
|
+ newloc->name); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_DELETE_HARDLINK_FAILED, |
|
+ "Failed to delete hard link"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* Requesting for number of hardlinks on the newloc |
|
+ * inode from POSIX. |
|
+ * */ |
|
+ is_dict_created = set_posix_link_request(this, &xdata); |
|
+ if (is_dict_created == -1) { |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_rename_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); |
|
+ |
|
+ if (is_dict_created == 1) { |
|
+ dict_unref(xdata); |
|
+ } |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************unlink******************************************/ |
|
+int32_t |
|
+ctr_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent, |
|
+ struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ uint32_t remaining_links = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ if (!xdata) |
|
+ goto out; |
|
+ |
|
+ /* |
|
+ * |
|
+ * Extracting GF_RESPONSE_LINK_COUNT_XDATA from POSIX Xlator |
|
+ * |
|
+ * */ |
|
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, |
|
+ &remaining_links); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, |
|
+ "Failed to getting GF_RESPONSE_LINK_COUNT_XDATA"); |
|
+ remaining_links = -1; |
|
+ } |
|
+ |
|
+ /*This is not the only link*/ |
|
+ if (remaining_links != 1) { |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, |
|
+ GFDB_FOP_UNDEL); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, |
|
+ "Failed to insert unlink unwind"); |
|
+ } |
|
+ } |
|
+ /*Last link that was deleted*/ |
|
+ else if (remaining_links == 1) { |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, |
|
+ GFDB_FOP_UNDEL_ALL); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, |
|
+ "Failed to insert unlink unwind"); |
|
+ } |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent, |
|
+ xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ gf_ctr_link_context_t ctr_link_cx; |
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; |
|
+ gf_boolean_t is_xdata_created = _gf_false; |
|
+ struct iatt dummy_stat = {0}; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ /*Fill link context*/ |
|
+ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, |
|
+ _link_cx, NULL, GFDB_FOP_DENTRY_WRITE, |
|
+ GFDB_FOP_WDEL); |
|
+ |
|
+ /*Internal FOP*/ |
|
+ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); |
|
+ |
|
+ /* Is a metadata FOP */ |
|
+ _inode_cx->is_metadata_fop = _gf_true; |
|
+ |
|
+ /* If its a internal FOP and dht link file donot record*/ |
|
+ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, |
|
+ "Failed to insert unlink wind"); |
|
+ } else { |
|
+ /* We are doing delete of hard link in inode context in wind |
|
+ * As we don't get the "inode" in the call back for rename */ |
|
+ ret = delete_hard_link_ctx(frame, this, loc->inode); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, |
|
+ "Failed " |
|
+ "deleting hard link from ctr inode context"); |
|
+ } |
|
+ } |
|
+ |
|
+ /* |
|
+ * |
|
+ * Sending GF_REQUEST_LINK_COUNT_XDATA |
|
+ * to POSIX Xlator to send link count in unwind path |
|
+ * |
|
+ * */ |
|
+ /*create xdata if NULL*/ |
|
+ if (!xdata) { |
|
+ xdata = dict_new(); |
|
+ is_xdata_created = (xdata) ? _gf_true : _gf_false; |
|
+ } |
|
+ if (!xdata) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL, |
|
+ "xdata is NULL :Cannot send " |
|
+ "GF_REQUEST_LINK_COUNT_XDATA to posix"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, |
|
+ "Failed setting GF_REQUEST_LINK_COUNT_XDATA"); |
|
+ if (is_xdata_created) { |
|
+ dict_unref(xdata); |
|
+ } |
|
+ goto out; |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_unlink_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); |
|
+ |
|
+ if (is_xdata_created) |
|
+ dict_unref(xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************fsync******************************************/ |
|
+int32_t |
|
+ctr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, |
|
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, |
|
+ "Failed to insert fsync unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, |
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_WIND_FAILED, |
|
+ "Failed to insert fsync wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_fsync_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************setxattr****************************************/ |
|
+ |
|
+int |
|
+ctr_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, |
|
+ "Failed to insert setxattr unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+ctr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, |
|
+ int flags, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, loc->inode->gfid, |
|
+ NULL, NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, |
|
+ "Failed to insert setxattr wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_setxattr_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata); |
|
+ return 0; |
|
+} |
|
+/**************************** fsetxattr *************************************/ |
|
+int32_t |
|
+ctr_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, |
|
+ int32_t op_ret, int32_t op_errno, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_WRITE, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, |
|
+ "Failed to insert fsetxattr unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int32_t |
|
+ctr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, |
|
+ int32_t flags, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, |
|
+ NULL, GFDB_FOP_INODE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_SETATTR_WIND_FAILED, |
|
+ "Failed to insert fsetxattr wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_fsetxattr_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); |
|
+ return 0; |
|
+} |
|
+/****************************mknod*******************************************/ |
|
+ |
|
+int32_t |
|
+ctr_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, |
|
+ int32_t op_errno, inode_t *inode, struct iatt *buf, |
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_heal_ret_val_t ret_val = CTR_CTX_ERROR; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ /* Add hard link to the list */ |
|
+ ret_val = add_hard_link_ctx(frame, this, inode); |
|
+ if (ret_val == CTR_CTX_ERROR) { |
|
+ gf_msg_trace(this->name, 0, "Failed adding hard link"); |
|
+ } |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE, |
|
+ GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED, |
|
+ "Failed to insert mknod unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent, |
|
+ postparent, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+ctr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, |
|
+ dev_t rdev, mode_t umask, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ gf_ctr_link_context_t ctr_link_cx; |
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; |
|
+ uuid_t gfid = { |
|
+ 0, |
|
+ }; |
|
+ uuid_t *ptr_gfid = &gfid; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(frame->root); |
|
+ |
|
+ /*get gfid from xdata dict*/ |
|
+ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); |
|
+ if (ret) { |
|
+ gf_msg_debug(this->name, 0, "failed to get gfid from dict"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*fill ctr link context*/ |
|
+ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx, |
|
+ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_MKNOD_WIND_FAILED, |
|
+ "Failed to insert mknod wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_mknod_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************create******************************************/ |
|
+int |
|
+ctr_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, |
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = add_hard_link_ctx(frame, this, inode); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_ADD_HARDLINK_FAILED, |
|
+ "Failed adding hard link"); |
|
+ } |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_CREATE_WRITE, |
|
+ GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, |
|
+ "Failed to insert create unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf, |
|
+ preparent, postparent, xdata); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+ctr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, |
|
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ gf_ctr_link_context_t ctr_link_cx; |
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; |
|
+ uuid_t gfid = { |
|
+ 0, |
|
+ }; |
|
+ uuid_t *ptr_gfid = &gfid; |
|
+ struct iatt dummy_stat = {0}; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(frame->root); |
|
+ |
|
+ /*Get GFID from Xdata dict*/ |
|
+ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_GET_GFID_FROM_DICT_FAILED, |
|
+ "failed to get gfid from dict"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*fill ctr link context*/ |
|
+ FILL_CTR_LINK_CX(_link_cx, loc->pargfid, loc->name, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, loc->inode->ia_type, *ptr_gfid, _link_cx, |
|
+ NULL, GFDB_FOP_CREATE_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*Internal FOP*/ |
|
+ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); |
|
+ |
|
+ /* If its a internal FOP and dht link file donot record*/ |
|
+ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, &ctr_inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_WIND_FAILED, |
|
+ "Failed to insert create wind"); |
|
+ } |
|
+out: |
|
+ STACK_WIND(frame, ctr_create_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, |
|
+ xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/****************************link********************************************/ |
|
+ |
|
+int |
|
+ctr_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, inode_t *inode, struct iatt *stbuf, |
|
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ /* Add hard link to the list */ |
|
+ ret = add_hard_link_ctx(frame, this, inode); |
|
+ if (ret) { |
|
+ gf_msg_trace(this->name, 0, "Failed adding hard link"); |
|
+ } |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_DENTRY_WRITE, |
|
+ GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, |
|
+ "Failed to insert create unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent, |
|
+ postparent, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+ctr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, |
|
+ dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ gf_ctr_link_context_t ctr_link_cx; |
|
+ gf_ctr_link_context_t *_link_cx = &ctr_link_cx; |
|
+ struct iatt dummy_stat = {0}; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(frame->root); |
|
+ |
|
+ /*fill ctr link context*/ |
|
+ FILL_CTR_LINK_CX(_link_cx, newloc->pargfid, newloc->name, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, oldloc->inode->ia_type, |
|
+ oldloc->inode->gfid, _link_cx, NULL, |
|
+ GFDB_FOP_DENTRY_WRITE, GFDB_FOP_WIND); |
|
+ |
|
+ /*Internal FOP*/ |
|
+ _inode_cx->is_internal_fop = is_internal_fop(frame, xdata); |
|
+ |
|
+ /* Is a metadata fop */ |
|
+ _inode_cx->is_metadata_fop = _gf_true; |
|
+ |
|
+ /* If its a internal FOP and dht link file donot record*/ |
|
+ if (_inode_cx->is_internal_fop && dht_is_linkfile(&dummy_stat, xdata)) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_LINK_WIND_FAILED, |
|
+ "Failed to insert link wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_link_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/******************************readv*****************************************/ |
|
+int |
|
+ctr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, |
|
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf, |
|
+ struct iobref *iobref, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, out); |
|
+ |
|
+ ret = ctr_insert_unwind(frame, this, GFDB_FOP_INODE_READ, GFDB_FOP_UNWIND); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_CREATE_UNWIND_FAILED, |
|
+ "Failed to insert create unwind"); |
|
+ } |
|
+ |
|
+out: |
|
+ ctr_free_frame_local(frame); |
|
+ |
|
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf, |
|
+ iobref, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+ctr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, |
|
+ uint32_t flags, dict_t *xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_inode_context_t ctr_inode_cx; |
|
+ gf_ctr_inode_context_t *_inode_cx = &ctr_inode_cx; |
|
+ |
|
+ CTR_IS_DISABLED_THEN_GOTO(this, out); |
|
+ CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, out); |
|
+ |
|
+ /*Fill ctr inode context*/ |
|
+ FILL_CTR_INODE_CONTEXT(_inode_cx, fd->inode->ia_type, fd->inode->gfid, NULL, |
|
+ NULL, GFDB_FOP_INODE_READ, GFDB_FOP_WIND); |
|
+ |
|
+ /*record into the database*/ |
|
+ ret = ctr_insert_wind(frame, this, _inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_READV_WIND_FAILED, |
|
+ "Failed to insert readv wind"); |
|
+ } |
|
+ |
|
+out: |
|
+ STACK_WIND(frame, ctr_readv_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->readv, fd, size, off, flags, xdata); |
|
+ return 0; |
|
+} |
|
+ |
|
+/*******************************ctr_ipc****************************************/ |
|
+ |
|
+/*This is the call back function per record/file from data base*/ |
|
+static int |
|
+ctr_db_query_callback(gfdb_query_record_t *gfdb_query_record, void *args) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_query_cbk_args_t *query_cbk_args = args; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("ctr", query_cbk_args, out); |
|
+ |
|
+ ret = gfdb_write_query_record(query_cbk_args->query_fd, gfdb_query_record); |
|
+ if (ret) { |
|
+ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "Failed to write to query file"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ query_cbk_args->count++; |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+/* This function does all the db queries related to tiering and |
|
+ * generates/populates new/existing query file |
|
+ * inputs: |
|
+ * xlator_t *this : CTR Translator |
|
+ * void *conn_node : Database connection |
|
+ * char *query_file: the query file that needs to be updated |
|
+ * gfdb_ipc_ctr_params_t *ipc_ctr_params: the query parameters |
|
+ * Return: |
|
+ * On success 0 |
|
+ * On failure -1 |
|
+ * */ |
|
+int |
|
+ctr_db_query(xlator_t *this, void *conn_node, char *query_file, |
|
+ gfdb_ipc_ctr_params_t *ipc_ctr_params) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_query_cbk_args_t query_cbk_args = {0}; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, conn_node, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, query_file, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, ipc_ctr_params, out); |
|
+ |
|
+ /*Query for eligible files from db*/ |
|
+ query_cbk_args.query_fd = open(query_file, O_WRONLY | O_CREAT | O_APPEND, |
|
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); |
|
+ if (query_cbk_args.query_fd < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, errno, CTR_MSG_FATAL_ERROR, |
|
+ "Failed to open query file %s", query_file); |
|
+ goto out; |
|
+ } |
|
+ if (!ipc_ctr_params->is_promote) { |
|
+ if (ipc_ctr_params->emergency_demote) { |
|
+ /* emergency demotion mode */ |
|
+ ret = find_all(conn_node, ctr_db_query_callback, |
|
+ (void *)&query_cbk_args, |
|
+ ipc_ctr_params->query_limit); |
|
+ } else { |
|
+ if (ipc_ctr_params->write_freq_threshold == 0 && |
|
+ ipc_ctr_params->read_freq_threshold == 0) { |
|
+ ret = find_unchanged_for_time(conn_node, ctr_db_query_callback, |
|
+ (void *)&query_cbk_args, |
|
+ &ipc_ctr_params->time_stamp); |
|
+ } else { |
|
+ ret = find_unchanged_for_time_freq( |
|
+ conn_node, ctr_db_query_callback, (void *)&query_cbk_args, |
|
+ &ipc_ctr_params->time_stamp, |
|
+ ipc_ctr_params->write_freq_threshold, |
|
+ ipc_ctr_params->read_freq_threshold, _gf_false); |
|
+ } |
|
+ } |
|
+ } else { |
|
+ if (ipc_ctr_params->write_freq_threshold == 0 && |
|
+ ipc_ctr_params->read_freq_threshold == 0) { |
|
+ ret = find_recently_changed_files(conn_node, ctr_db_query_callback, |
|
+ (void *)&query_cbk_args, |
|
+ &ipc_ctr_params->time_stamp); |
|
+ } else { |
|
+ ret = find_recently_changed_files_freq( |
|
+ conn_node, ctr_db_query_callback, (void *)&query_cbk_args, |
|
+ &ipc_ctr_params->time_stamp, |
|
+ ipc_ctr_params->write_freq_threshold, |
|
+ ipc_ctr_params->read_freq_threshold, _gf_false); |
|
+ } |
|
+ } |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "FATAL: query from db failed"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = clear_files_heat(conn_node); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "FATAL: Failed to clear db entries"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ |
|
+ if (!ret) |
|
+ ret = query_cbk_args.count; |
|
+ |
|
+ if (query_cbk_args.query_fd >= 0) { |
|
+ sys_close(query_cbk_args.query_fd); |
|
+ query_cbk_args.query_fd = -1; |
|
+ } |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+void * |
|
+ctr_compact_thread(void *args) |
|
+{ |
|
+ int ret = -1; |
|
+ void *db_conn = NULL; |
|
+ |
|
+ xlator_t *this = NULL; |
|
+ gf_ctr_private_t *priv = NULL; |
|
+ gf_boolean_t compact_active = _gf_false; |
|
+ gf_boolean_t compact_mode_switched = _gf_false; |
|
+ |
|
+ this = (xlator_t *)args; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out); |
|
+ |
|
+ priv = this->private; |
|
+ |
|
+ db_conn = priv->_db_conn; |
|
+ compact_active = priv->compact_active; |
|
+ compact_mode_switched = priv->compact_mode_switched; |
|
+ |
|
+ gf_msg("ctr-compact", GF_LOG_INFO, 0, CTR_MSG_SET, "Starting compaction"); |
|
+ |
|
+ ret = compact_db(db_conn, compact_active, compact_mode_switched); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg("ctr-compact", GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed to perform the compaction"); |
|
+ } |
|
+ |
|
+ ret = pthread_mutex_lock(&priv->compact_lock); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed to acquire lock"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* We are done compaction on this brick. Set all flags to false */ |
|
+ priv->compact_active = _gf_false; |
|
+ priv->compact_mode_switched = _gf_false; |
|
+ |
|
+ ret = pthread_mutex_unlock(&priv->compact_lock); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed to release lock"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+out: |
|
+ return NULL; |
|
+} |
|
+ |
|
+int |
|
+ctr_ipc_helper(xlator_t *this, dict_t *in_dict, dict_t *out_dict) |
|
+{ |
|
+ int ret = -1; |
|
+ char *ctr_ipc_ops = NULL; |
|
+ gf_ctr_private_t *priv = NULL; |
|
+ char *db_version = NULL; |
|
+ char *db_param_key = NULL; |
|
+ char *db_param = NULL; |
|
+ char *query_file = NULL; |
|
+ gfdb_ipc_ctr_params_t *ipc_ctr_params = NULL; |
|
+ int result = 0; |
|
+ pthread_t compact_thread; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out); |
|
+ priv = this->private; |
|
+ GF_VALIDATE_OR_GOTO(this->name, priv->_db_conn, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, in_dict, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, out_dict, out); |
|
+ |
|
+ GET_DB_PARAM_FROM_DICT(this->name, in_dict, GFDB_IPC_CTR_KEY, ctr_ipc_ops, |
|
+ out); |
|
+ |
|
+ /*if its a db clear operation */ |
|
+ if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_CLEAR_OPS, |
|
+ SLEN(GFDB_IPC_CTR_CLEAR_OPS)) == 0) { |
|
+ ret = clear_files_heat(priv->_db_conn); |
|
+ if (ret) |
|
+ goto out; |
|
+ |
|
+ } /* if its a query operation, in which case its query + clear db*/ |
|
+ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_QUERY_OPS, |
|
+ SLEN(GFDB_IPC_CTR_QUERY_OPS)) == 0) { |
|
+ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_QFILE_PATH, &query_file); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed extracting query file path"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_get_bin(in_dict, GFDB_IPC_CTR_GET_QUERY_PARAMS, |
|
+ (void *)&ipc_ctr_params); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed extracting query parameters"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = ctr_db_query(this, priv->_db_conn, query_file, ipc_ctr_params); |
|
+ |
|
+ ret = dict_set_int32(out_dict, GFDB_IPC_CTR_RET_QUERY_COUNT, ret); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed setting query reply"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ } /* if its a query for db version */ |
|
+ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_VERSION_OPS, |
|
+ SLEN(GFDB_IPC_CTR_GET_DB_VERSION_OPS)) == 0) { |
|
+ ret = get_db_version(priv->_db_conn, &db_version); |
|
+ if (ret == -1 || !db_version) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed extracting db version "); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ SET_DB_PARAM_TO_DICT(this->name, out_dict, GFDB_IPC_CTR_RET_DB_VERSION, |
|
+ db_version, ret, error); |
|
+ |
|
+ } /* if its a query for a db setting */ |
|
+ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_GET_DB_PARAM_OPS, |
|
+ SLEN(GFDB_IPC_CTR_GET_DB_PARAM_OPS)) == 0) { |
|
+ ret = dict_get_str(in_dict, GFDB_IPC_CTR_GET_DB_KEY, &db_param_key); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed extracting db param key"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = get_db_params(priv->_db_conn, db_param_key, &db_param); |
|
+ if (ret == -1 || !db_param) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ SET_DB_PARAM_TO_DICT(this->name, out_dict, db_param_key, db_param, ret, |
|
+ error); |
|
+ } /* if its an attempt to compact the database */ |
|
+ else if (strncmp(ctr_ipc_ops, GFDB_IPC_CTR_SET_COMPACT_PRAGMA, |
|
+ SLEN(GFDB_IPC_CTR_SET_COMPACT_PRAGMA)) == 0) { |
|
+ ret = pthread_mutex_lock(&priv->compact_lock); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed to acquire lock for compaction"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if ((priv->compact_active || priv->compact_mode_switched)) { |
|
+ /* Compaction in progress. LEAVE */ |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Compaction already in progress."); |
|
+ pthread_mutex_unlock(&priv->compact_lock); |
|
+ goto out; |
|
+ } |
|
+ /* At this point, we should be the only one on the brick */ |
|
+ /* compacting */ |
|
+ |
|
+ /* Grab the arguments from the dictionary */ |
|
+ ret = dict_get_int32(in_dict, "compact_active", &result); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed to get compaction type"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (result) { |
|
+ priv->compact_active = _gf_true; |
|
+ } |
|
+ |
|
+ ret = dict_get_int32(in_dict, "compact_mode_switched", &result); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed to see if compaction switched"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ if (result) { |
|
+ priv->compact_mode_switched = _gf_true; |
|
+ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET, |
|
+ "Pre-thread: Compact mode switch is true"); |
|
+ } else { |
|
+ gf_msg("ctr-compact", GF_LOG_TRACE, 0, CTR_MSG_SET, |
|
+ "Pre-thread: Compact mode switch is false"); |
|
+ } |
|
+ |
|
+ ret = pthread_mutex_unlock(&priv->compact_lock); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed to release lock for compaction"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = gf_thread_create(&compact_thread, NULL, ctr_compact_thread, |
|
+ (void *)this, "ctrcomp"); |
|
+ |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed to spawn compaction thread"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ goto out; |
|
+ } /* default case */ |
|
+ else { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ goto out; |
|
+error: |
|
+ GF_FREE(db_param_key); |
|
+ GF_FREE(db_param); |
|
+ GF_FREE(db_version); |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+/* IPC Call from tier migrator to clear the heat on the DB */ |
|
+int32_t |
|
+ctr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *in_dict) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_private_t *priv = NULL; |
|
+ dict_t *out_dict = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ priv = this->private; |
|
+ GF_ASSERT(priv); |
|
+ GF_ASSERT(priv->_db_conn); |
|
+ GF_VALIDATE_OR_GOTO(this->name, in_dict, wind); |
|
+ |
|
+ if (op != GF_IPC_TARGET_CTR) |
|
+ goto wind; |
|
+ |
|
+ out_dict = dict_new(); |
|
+ if (!out_dict) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = ctr_ipc_helper(this, in_dict, out_dict); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_SET, |
|
+ "Failed in ctr_ipc_helper"); |
|
+ } |
|
+out: |
|
+ |
|
+ STACK_UNWIND_STRICT(ipc, frame, ret, 0, out_dict); |
|
+ |
|
+ if (out_dict) |
|
+ dict_unref(out_dict); |
|
+ |
|
+ return 0; |
|
+ |
|
+wind: |
|
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this), |
|
+ FIRST_CHILD(this)->fops->ipc, op, in_dict); |
|
+ |
|
+ return 0; |
|
+} |
|
+ |
|
+/* Call to initialize db for ctr xlator while ctr is enabled */ |
|
+int32_t |
|
+initialize_ctr_resource(xlator_t *this, gf_ctr_private_t *priv) |
|
+{ |
|
+ int ret_db = -1; |
|
+ dict_t *params_dict = NULL; |
|
+ |
|
+ if (!priv) |
|
+ goto error; |
|
+ |
|
+ /* For compaction */ |
|
+ priv->compact_active = _gf_false; |
|
+ priv->compact_mode_switched = _gf_false; |
|
+ ret_db = pthread_mutex_init(&priv->compact_lock, NULL); |
|
+ |
|
+ if (ret_db) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "FATAL: Failed initializing compaction mutex"); |
|
+ goto error; |
|
+ } |
|
+ |
|
+ params_dict = dict_new(); |
|
+ if (!params_dict) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INIT_DB_PARAMS_FAILED, |
|
+ "DB Params cannot initialized!"); |
|
+ goto error; |
|
+ } |
|
+ |
|
+ /*Extract db params options*/ |
|
+ ret_db = extract_db_params(this, params_dict, priv->gfdb_db_type); |
|
+ if (ret_db) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED, |
|
+ "Failed extracting db params options"); |
|
+ goto error; |
|
+ } |
|
+ |
|
+ /*Create a memory pool for ctr xlator*/ |
|
+ this->local_pool = mem_pool_new(gf_ctr_local_t, 64); |
|
+ if (!this->local_pool) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED, |
|
+ "failed to create local memory pool"); |
|
+ goto error; |
|
+ } |
|
+ |
|
+ /*Initialize Database Connection*/ |
|
+ priv->_db_conn = init_db(params_dict, priv->gfdb_db_type); |
|
+ if (!priv->_db_conn) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "FATAL: Failed initializing data base"); |
|
+ goto error; |
|
+ } |
|
+ |
|
+ ret_db = 0; |
|
+ goto out; |
|
+ |
|
+error: |
|
+ if (this) |
|
+ mem_pool_destroy(this->local_pool); |
|
+ |
|
+ if (priv) { |
|
+ GF_FREE(priv->ctr_db_path); |
|
+ } |
|
+ GF_FREE(priv); |
|
+ ret_db = -1; |
|
+out: |
|
+ if (params_dict) |
|
+ dict_unref(params_dict); |
|
+ |
|
+ return ret_db; |
|
+} |
|
+ |
|
+/******************************************************************************/ |
|
+int |
|
+reconfigure(xlator_t *this, dict_t *options) |
|
+{ |
|
+ char *temp_str = NULL; |
|
+ int ret = 0; |
|
+ gf_ctr_private_t *priv = NULL; |
|
+ |
|
+ priv = this->private; |
|
+ |
|
+ if (dict_get_str(options, "changetimerecorder.frequency", &temp_str)) { |
|
+ gf_msg(this->name, GF_LOG_TRACE, 0, CTR_MSG_SET, "set"); |
|
+ } |
|
+ |
|
+ GF_OPTION_RECONF("ctr-enabled", priv->enabled, options, bool, out); |
|
+ if (!priv->enabled) { |
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, |
|
+ "CTR Xlator is not enabled so skip ctr reconfigure"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* If ctr is enabled after skip init for ctr xlator then call |
|
+ initialize_ctr_resource during reconfigure phase to allocate resources |
|
+ for xlator |
|
+ */ |
|
+ if (priv->enabled && !priv->_db_conn) { |
|
+ ret = initialize_ctr_resource(this, priv); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "FATAL: Failed ctr initialize resource"); |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ |
|
+ GF_OPTION_RECONF("record-counters", priv->ctr_record_counter, options, bool, |
|
+ out); |
|
+ |
|
+ GF_OPTION_RECONF("ctr-record-metadata-heat", priv->ctr_record_metadata_heat, |
|
+ options, bool, out); |
|
+ |
|
+ GF_OPTION_RECONF("ctr_link_consistency", priv->ctr_link_consistency, |
|
+ options, bool, out); |
|
+ |
|
+ GF_OPTION_RECONF("ctr_lookupheal_inode_timeout", |
|
+ priv->ctr_lookupheal_inode_timeout, options, uint64, out); |
|
+ |
|
+ GF_OPTION_RECONF("ctr_lookupheal_link_timeout", |
|
+ priv->ctr_lookupheal_link_timeout, options, uint64, out); |
|
+ |
|
+ GF_OPTION_RECONF("record-exit", priv->ctr_record_unwind, options, bool, |
|
+ out); |
|
+ |
|
+ GF_OPTION_RECONF("record-entry", priv->ctr_record_wind, options, bool, out); |
|
+ |
|
+ /* If database is sqlite */ |
|
+ if (priv->gfdb_db_type == GFDB_SQLITE3) { |
|
+ /* AUTOCHECKPOINT */ |
|
+ if (dict_get_str(options, GFDB_SQL_PARAM_WAL_AUTOCHECK, &temp_str) == |
|
+ 0) { |
|
+ ret = set_db_params(priv->_db_conn, "wal_autocheckpoint", temp_str); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, |
|
+ "Failed to set %s", GFDB_SQL_PARAM_WAL_AUTOCHECK); |
|
+ } |
|
+ } |
|
+ |
|
+ /* CACHE_SIZE */ |
|
+ if (dict_get_str(options, GFDB_SQL_PARAM_CACHE_SIZE, &temp_str) == 0) { |
|
+ ret = set_db_params(priv->_db_conn, "cache_size", temp_str); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, |
|
+ "Failed to set %s", GFDB_SQL_PARAM_CACHE_SIZE); |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/****************************init********************************************/ |
|
+ |
|
+int32_t |
|
+init(xlator_t *this) |
|
+{ |
|
+ gf_ctr_private_t *priv = NULL; |
|
+ int ret_db = -1; |
|
+ |
|
+ if (!this) { |
|
+ gf_msg("ctr", GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "FATAL: ctr this is not initialized"); |
|
+ return -1; |
|
+ } |
|
+ |
|
+ if (!this->children || this->children->next) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "FATAL: ctr should have exactly one child"); |
|
+ return -1; |
|
+ } |
|
+ |
|
+ if (!this->parents) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DANGLING_VOLUME, |
|
+ "dangling volume. check volfile "); |
|
+ } |
|
+ |
|
+ priv = GF_CALLOC(1, sizeof(*priv), gf_ctr_mt_private_t); |
|
+ if (!priv) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED, |
|
+ "Calloc did not work!!!"); |
|
+ return -1; |
|
+ } |
|
+ |
|
+ /*Default values for the translator*/ |
|
+ priv->ctr_record_wind = _gf_true; |
|
+ priv->ctr_record_unwind = _gf_false; |
|
+ priv->ctr_hot_brick = _gf_false; |
|
+ priv->gfdb_db_type = GFDB_SQLITE3; |
|
+ priv->gfdb_sync_type = GFDB_DB_SYNC; |
|
+ priv->_db_conn = NULL; |
|
+ priv->ctr_lookupheal_link_timeout = CTR_DEFAULT_HARDLINK_EXP_PERIOD; |
|
+ priv->ctr_lookupheal_inode_timeout = CTR_DEFAULT_INODE_EXP_PERIOD; |
|
+ |
|
+ /*Extract ctr xlator options*/ |
|
+ ret_db = extract_ctr_options(this, priv); |
|
+ if (ret_db) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED, |
|
+ "Failed extracting ctr xlator options"); |
|
+ GF_FREE(priv); |
|
+ return -1; |
|
+ } |
|
+ |
|
+ if (!priv->enabled) { |
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, |
|
+ "CTR Xlator is not enabled so skip ctr init"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret_db = initialize_ctr_resource(this, priv); |
|
+ if (ret_db) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_FATAL_ERROR, |
|
+ "FATAL: Failed ctr initialize resource"); |
|
+ return -1; |
|
+ } |
|
+ |
|
+out: |
|
+ this->private = (void *)priv; |
|
+ return 0; |
|
+} |
|
+ |
|
+int |
|
+notify(xlator_t *this, int event, void *data, ...) |
|
+{ |
|
+ gf_ctr_private_t *priv = NULL; |
|
+ int ret = 0; |
|
+ |
|
+ priv = this->private; |
|
+ |
|
+ if (!priv) |
|
+ goto out; |
|
+ |
|
+ ret = default_notify(this, event, data); |
|
+ |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+int32_t |
|
+mem_acct_init(xlator_t *this) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out); |
|
+ |
|
+ ret = xlator_mem_acct_init(this, gf_ctr_mt_end + 1); |
|
+ |
|
+ if (ret != 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_MEM_ACC_INIT_FAILED, |
|
+ "Memory accounting init" |
|
+ "failed"); |
|
+ return ret; |
|
+ } |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+void |
|
+fini(xlator_t *this) |
|
+{ |
|
+ gf_ctr_private_t *priv = NULL; |
|
+ |
|
+ priv = this->private; |
|
+ |
|
+ if (priv && priv->enabled) { |
|
+ if (fini_db(priv->_db_conn)) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED, |
|
+ "Failed closing " |
|
+ "db connection"); |
|
+ } |
|
+ |
|
+ if (priv->_db_conn) |
|
+ priv->_db_conn = NULL; |
|
+ |
|
+ GF_FREE(priv->ctr_db_path); |
|
+ if (pthread_mutex_destroy(&priv->compact_lock)) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_CLOSE_DB_CONN_FAILED, |
|
+ "Failed to " |
|
+ "destroy the compaction mutex"); |
|
+ } |
|
+ } |
|
+ GF_FREE(priv); |
|
+ mem_pool_destroy(this->local_pool); |
|
+ this->local_pool = NULL; |
|
+ |
|
+ return; |
|
+} |
|
+ |
|
+struct xlator_fops fops = { |
|
+ /*lookup*/ |
|
+ .lookup = ctr_lookup, |
|
+ /*write fops */ |
|
+ .mknod = ctr_mknod, |
|
+ .create = ctr_create, |
|
+ .truncate = ctr_truncate, |
|
+ .ftruncate = ctr_ftruncate, |
|
+ .setxattr = ctr_setxattr, |
|
+ .fsetxattr = ctr_fsetxattr, |
|
+ .removexattr = ctr_removexattr, |
|
+ .fremovexattr = ctr_fremovexattr, |
|
+ .unlink = ctr_unlink, |
|
+ .link = ctr_link, |
|
+ .rename = ctr_rename, |
|
+ .writev = ctr_writev, |
|
+ .setattr = ctr_setattr, |
|
+ .fsetattr = ctr_fsetattr, |
|
+ /*read fops*/ |
|
+ .readv = ctr_readv, |
|
+ /* IPC call*/ |
|
+ .ipc = ctr_ipc}; |
|
+ |
|
+struct xlator_cbks cbks = {.forget = ctr_forget}; |
|
+ |
|
+struct volume_options options[] = { |
|
+ {.key = |
|
+ { |
|
+ "ctr-enabled", |
|
+ }, |
|
+ .type = GF_OPTION_TYPE_BOOL, |
|
+ .value = {"on", "off"}, |
|
+ .default_value = "off", |
|
+ .description = "Enables the CTR", |
|
+ .flags = OPT_FLAG_SETTABLE}, |
|
+ {.key = {"record-entry"}, |
|
+ .type = GF_OPTION_TYPE_BOOL, |
|
+ .value = {"on", "off"}, |
|
+ .default_value = "on"}, |
|
+ {.key = {"record-exit"}, |
|
+ .type = GF_OPTION_TYPE_BOOL, |
|
+ .value = {"on", "off"}, |
|
+ .default_value = "off"}, |
|
+ {.key = {"record-counters"}, |
|
+ .type = GF_OPTION_TYPE_BOOL, |
|
+ .value = {"on", "off"}, |
|
+ .default_value = "off", |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .tags = {}}, |
|
+ {.key = {"ctr-record-metadata-heat"}, |
|
+ .type = GF_OPTION_TYPE_BOOL, |
|
+ .value = {"on", "off"}, |
|
+ .default_value = "off", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .tags = {}}, |
|
+ {.key = {"ctr_link_consistency"}, |
|
+ .type = GF_OPTION_TYPE_BOOL, |
|
+ .value = {"on", "off"}, |
|
+ .default_value = "off", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .tags = {}}, |
|
+ {.key = {"ctr_lookupheal_link_timeout"}, |
|
+ .type = GF_OPTION_TYPE_INT, |
|
+ .default_value = "300", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_2}, |
|
+ .tags = {}}, |
|
+ {.key = {"ctr_lookupheal_inode_timeout"}, |
|
+ .type = GF_OPTION_TYPE_INT, |
|
+ .default_value = "300", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_2}, |
|
+ .tags = {}}, |
|
+ {.key = {"hot-brick"}, |
|
+ .type = GF_OPTION_TYPE_BOOL, |
|
+ .value = {"on", "off"}, |
|
+ .default_value = "off"}, |
|
+ {.key = {"db-type"}, |
|
+ .type = GF_OPTION_TYPE_STR, |
|
+ .value = {"hashfile", "rocksdb", "changelog", "sqlite3", "hyperdex"}, |
|
+ .default_value = "sqlite3", |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .tags = {}}, |
|
+ {.key = {"db-sync"}, |
|
+ .type = GF_OPTION_TYPE_STR, |
|
+ .value = {"sync", "async"}, |
|
+ .default_value = "sync"}, |
|
+ {.key = {"db-path"}, .type = GF_OPTION_TYPE_PATH}, |
|
+ {.key = {"db-name"}, .type = GF_OPTION_TYPE_STR}, |
|
+ {.key = {GFDB_SQL_PARAM_SYNC}, |
|
+ .type = GF_OPTION_TYPE_STR, |
|
+ .value = {"off", "normal", "full"}, |
|
+ .default_value = "normal"}, |
|
+ {.key = {GFDB_SQL_PARAM_JOURNAL_MODE}, |
|
+ .type = GF_OPTION_TYPE_STR, |
|
+ .value = {"delete", "truncate", "persist", "memory", "wal", "off"}, |
|
+ .default_value = "wal", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .tags = {}}, |
|
+ {.key = {GFDB_SQL_PARAM_AUTO_VACUUM}, |
|
+ .type = GF_OPTION_TYPE_STR, |
|
+ .value = {"off", "full", "incr"}, |
|
+ .default_value = "off", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .tags = {}}, |
|
+ {.key = {GFDB_SQL_PARAM_WAL_AUTOCHECK}, |
|
+ .type = GF_OPTION_TYPE_INT, |
|
+ .default_value = "25000", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .tags = {}}, |
|
+ {.key = {GFDB_SQL_PARAM_CACHE_SIZE}, |
|
+ .type = GF_OPTION_TYPE_INT, |
|
+ .default_value = "12500", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .tags = {}}, |
|
+ {.key = {GFDB_SQL_PARAM_PAGE_SIZE}, |
|
+ .type = GF_OPTION_TYPE_INT, |
|
+ .default_value = "4096", |
|
+ .flags = OPT_FLAG_SETTABLE, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, |
|
+ .tags = {}}, |
|
+ {.key = {NULL}}, |
|
+}; |
|
+ |
|
+xlator_api_t xlator_api = { |
|
+ .init = init, |
|
+ .fini = fini, |
|
+ .notify = notify, |
|
+ .reconfigure = reconfigure, |
|
+ .mem_acct_init = mem_acct_init, |
|
+ .op_version = {GD_OP_VERSION_3_7_0}, /* Present from the initial version */ |
|
+ .fops = &fops, |
|
+ .cbks = &cbks, |
|
+ .identifier = "changetimerecorder", |
|
+ .category = GF_MAINTAINED, |
|
+ .options = options, |
|
+}; |
|
diff --git a/xlators/features/changetimerecorder/src/changetimerecorder.h b/xlators/features/changetimerecorder/src/changetimerecorder.h |
|
new file mode 100644 |
|
index 0000000..0150a1c |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/changetimerecorder.h |
|
@@ -0,0 +1,21 @@ |
|
+/* |
|
+ Copyright (c) 2006-2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#ifndef __CTR_H |
|
+#define __CTR_H |
|
+ |
|
+#include <glusterfs/glusterfs.h> |
|
+#include <glusterfs/xlator.h> |
|
+#include <glusterfs/logging.h> |
|
+#include <glusterfs/common-utils.h> |
|
+#include "ctr_mem_types.h" |
|
+#include "ctr-helper.h" |
|
+ |
|
+#endif /* __CTR_H */ |
|
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.c b/xlators/features/changetimerecorder/src/ctr-helper.c |
|
new file mode 100644 |
|
index 0000000..e1e6573 |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/ctr-helper.c |
|
@@ -0,0 +1,293 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#include "gfdb_sqlite3.h" |
|
+#include "ctr-helper.h" |
|
+#include "ctr-messages.h" |
|
+ |
|
+/******************************************************************************* |
|
+ * |
|
+ * Fill unwind into db record |
|
+ * |
|
+ ******************************************************************************/ |
|
+int |
|
+fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local, |
|
+ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path) |
|
+{ |
|
+ int ret = -1; |
|
+ gfdb_time_t *ctr_uwtime = NULL; |
|
+ gf_ctr_private_t *_priv = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ _priv = this->private; |
|
+ GF_ASSERT(_priv); |
|
+ |
|
+ GF_ASSERT(ctr_local); |
|
+ |
|
+ /*If not unwind path error*/ |
|
+ if (!isunwindpath(fop_path)) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH, |
|
+ "Wrong fop_path. Should be unwind"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ctr_uwtime = &CTR_DB_REC(ctr_local).gfdb_unwind_change_time; |
|
+ CTR_DB_REC(ctr_local).gfdb_fop_path = fop_path; |
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = fop_type; |
|
+ |
|
+ ret = gettimeofday(ctr_uwtime, NULL); |
|
+ if (ret == -1) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, errno, |
|
+ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, |
|
+ "Error " |
|
+ "filling unwind time record %s", |
|
+ strerror(errno)); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* Special case i.e if its a tier rebalance |
|
+ * + cold tier brick |
|
+ * + its a create/mknod FOP |
|
+ * we record unwind time as zero */ |
|
+ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG && |
|
+ (!_priv->ctr_hot_brick) && isdentrycreatefop(fop_type)) { |
|
+ memset(ctr_uwtime, 0, sizeof(*ctr_uwtime)); |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+/******************************************************************************* |
|
+ * |
|
+ * Fill wind into db record |
|
+ * |
|
+ ******************************************************************************/ |
|
+int |
|
+fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local, |
|
+ gf_ctr_inode_context_t *ctr_inode_cx) |
|
+{ |
|
+ int ret = -1; |
|
+ gfdb_time_t *ctr_wtime = NULL; |
|
+ gf_ctr_private_t *_priv = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ _priv = this->private; |
|
+ GF_ASSERT(_priv); |
|
+ GF_ASSERT(ctr_local); |
|
+ IS_CTR_INODE_CX_SANE(ctr_inode_cx); |
|
+ |
|
+ /*if not wind path error!*/ |
|
+ if (!iswindpath(ctr_inode_cx->fop_path)) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_WRONG_FOP_PATH, |
|
+ "Wrong fop_path. Should be wind"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ctr_wtime = &CTR_DB_REC(ctr_local).gfdb_wind_change_time; |
|
+ CTR_DB_REC(ctr_local).gfdb_fop_path = ctr_inode_cx->fop_path; |
|
+ CTR_DB_REC(ctr_local).gfdb_fop_type = ctr_inode_cx->fop_type; |
|
+ CTR_DB_REC(ctr_local).link_consistency = _priv->ctr_link_consistency; |
|
+ |
|
+ ret = gettimeofday(ctr_wtime, NULL); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, errno, |
|
+ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, |
|
+ "Error filling wind time record %s", strerror(errno)); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* Special case i.e if its a tier rebalance |
|
+ * + cold tier brick |
|
+ * + its a create/mknod FOP |
|
+ * we record wind time as zero */ |
|
+ if (ctr_local->client_pid == GF_CLIENT_PID_TIER_DEFRAG && |
|
+ (!_priv->ctr_hot_brick) && isdentrycreatefop(ctr_inode_cx->fop_type)) { |
|
+ memset(ctr_wtime, 0, sizeof(*ctr_wtime)); |
|
+ } |
|
+ |
|
+ /* Copy gfid into db record */ |
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).gfid, *(ctr_inode_cx->gfid)); |
|
+ |
|
+ /* Copy older gfid if any */ |
|
+ if (ctr_inode_cx->old_gfid && |
|
+ (!gf_uuid_is_null(*(ctr_inode_cx->old_gfid)))) { |
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).old_gfid, *(ctr_inode_cx->old_gfid)); |
|
+ } |
|
+ |
|
+ /*Hard Links*/ |
|
+ if (isdentryfop(ctr_inode_cx->fop_type)) { |
|
+ /*new link fop*/ |
|
+ if (NEW_LINK_CX(ctr_inode_cx)) { |
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).pargfid, |
|
+ *((NEW_LINK_CX(ctr_inode_cx))->pargfid)); |
|
+ strcpy(CTR_DB_REC(ctr_local).file_name, |
|
+ NEW_LINK_CX(ctr_inode_cx)->basename); |
|
+ } |
|
+ /*rename fop*/ |
|
+ if (OLD_LINK_CX(ctr_inode_cx)) { |
|
+ gf_uuid_copy(CTR_DB_REC(ctr_local).old_pargfid, |
|
+ *((OLD_LINK_CX(ctr_inode_cx))->pargfid)); |
|
+ strcpy(CTR_DB_REC(ctr_local).old_file_name, |
|
+ OLD_LINK_CX(ctr_inode_cx)->basename); |
|
+ } |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ /*On error roll back and clean the record*/ |
|
+ if (ret == -1) { |
|
+ CLEAR_CTR_DB_RECORD(ctr_local); |
|
+ } |
|
+ return ret; |
|
+} |
|
+ |
|
+/****************************************************************************** |
|
+ * |
|
+ * CTR xlator init related functions |
|
+ * |
|
+ * |
|
+ * ****************************************************************************/ |
|
+static int |
|
+extract_sql_params(xlator_t *this, dict_t *params_dict) |
|
+{ |
|
+ int ret = -1; |
|
+ char *db_path = NULL; |
|
+ char *db_name = NULL; |
|
+ char *db_full_path = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(params_dict); |
|
+ |
|
+ /*Extract the path of the db*/ |
|
+ db_path = NULL; |
|
+ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-path", |
|
+ db_path, "/var/run/gluster/"); |
|
+ |
|
+ /*Extract the name of the db*/ |
|
+ db_name = NULL; |
|
+ GET_DB_PARAM_FROM_DICT_DEFAULT(this->name, this->options, "db-name", |
|
+ db_name, "gf_ctr_db.db"); |
|
+ |
|
+ /*Construct full path of the db*/ |
|
+ ret = gf_asprintf(&db_full_path, "%s/%s", db_path, db_name); |
|
+ if (ret < 0) { |
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_CONSTRUCT_DB_PATH_FAILED, |
|
+ "Construction of full db path failed!"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*Setting the SQL DB Path*/ |
|
+ SET_DB_PARAM_TO_DICT(this->name, params_dict, GFDB_SQL_PARAM_DBPATH, |
|
+ db_full_path, ret, out); |
|
+ |
|
+ /*Extract rest of the sql params*/ |
|
+ ret = gfdb_set_sql_params(this->name, this->options, params_dict); |
|
+ if (ret) { |
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, |
|
+ "Failed setting values to sql param dict!"); |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ if (ret) |
|
+ GF_FREE(db_full_path); |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type) |
|
+{ |
|
+ int ret = -1; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(params_dict); |
|
+ |
|
+ switch (db_type) { |
|
+ case GFDB_SQLITE3: |
|
+ ret = extract_sql_params(this, params_dict); |
|
+ if (ret) |
|
+ goto out; |
|
+ break; |
|
+ case GFDB_ROCKS_DB: |
|
+ case GFDB_HYPERDEX: |
|
+ case GFDB_HASH_FILE_STORE: |
|
+ case GFDB_INVALID_DB: |
|
+ case GFDB_DB_END: |
|
+ goto out; |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv) |
|
+{ |
|
+ int ret = -1; |
|
+ char *_val_str = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(_priv); |
|
+ |
|
+ /*Checking if the CTR Translator is enabled. By default its disabled*/ |
|
+ _priv->enabled = _gf_false; |
|
+ GF_OPTION_INIT("ctr-enabled", _priv->enabled, bool, out); |
|
+ if (!_priv->enabled) { |
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_INFO, 0, CTR_MSG_XLATOR_DISABLED, |
|
+ "CTR Xlator is disabled."); |
|
+ ret = 0; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*Extract db type*/ |
|
+ GF_OPTION_INIT("db-type", _val_str, str, out); |
|
+ _priv->gfdb_db_type = gf_string2gfdbdbtype(_val_str); |
|
+ |
|
+ /*Extract flag for record on wind*/ |
|
+ GF_OPTION_INIT("record-entry", _priv->ctr_record_wind, bool, out); |
|
+ |
|
+ /*Extract flag for record on unwind*/ |
|
+ GF_OPTION_INIT("record-exit", _priv->ctr_record_unwind, bool, out); |
|
+ |
|
+ /*Extract flag for record on counters*/ |
|
+ GF_OPTION_INIT("record-counters", _priv->ctr_record_counter, bool, out); |
|
+ |
|
+ /* Extract flag for record metadata heat */ |
|
+ GF_OPTION_INIT("ctr-record-metadata-heat", _priv->ctr_record_metadata_heat, |
|
+ bool, out); |
|
+ |
|
+ /*Extract flag for link consistency*/ |
|
+ GF_OPTION_INIT("ctr_link_consistency", _priv->ctr_link_consistency, bool, |
|
+ out); |
|
+ |
|
+ /*Extract ctr_lookupheal_inode_timeout */ |
|
+ GF_OPTION_INIT("ctr_lookupheal_inode_timeout", |
|
+ _priv->ctr_lookupheal_inode_timeout, uint64, out); |
|
+ |
|
+ /*Extract ctr_lookupheal_link_timeout*/ |
|
+ GF_OPTION_INIT("ctr_lookupheal_link_timeout", |
|
+ _priv->ctr_lookupheal_link_timeout, uint64, out); |
|
+ |
|
+ /*Extract flag for hot tier brick*/ |
|
+ GF_OPTION_INIT("hot-brick", _priv->ctr_hot_brick, bool, out); |
|
+ |
|
+ /*Extract flag for sync mode*/ |
|
+ GF_OPTION_INIT("db-sync", _val_str, str, out); |
|
+ _priv->gfdb_sync_type = gf_string2gfdbdbsync(_val_str); |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ return ret; |
|
+} |
|
diff --git a/xlators/features/changetimerecorder/src/ctr-helper.h b/xlators/features/changetimerecorder/src/ctr-helper.h |
|
new file mode 100644 |
|
index 0000000..517fbb0 |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/ctr-helper.h |
|
@@ -0,0 +1,854 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#ifndef __CTR_HELPER_H |
|
+#define __CTR_HELPER_H |
|
+ |
|
+#include <glusterfs/xlator.h> |
|
+#include "ctr_mem_types.h" |
|
+#include <glusterfs/iatt.h> |
|
+#include <glusterfs/glusterfs.h> |
|
+#include <glusterfs/xlator.h> |
|
+#include <glusterfs/defaults.h> |
|
+#include <glusterfs/logging.h> |
|
+#include <glusterfs/common-utils.h> |
|
+#include <time.h> |
|
+#include <sys/time.h> |
|
+#include <pthread.h> |
|
+ |
|
+#include "gfdb_data_store.h" |
|
+#include "ctr-xlator-ctx.h" |
|
+#include "ctr-messages.h" |
|
+ |
|
+#define CTR_DEFAULT_HARDLINK_EXP_PERIOD 300 /* Five mins */ |
|
+#define CTR_DEFAULT_INODE_EXP_PERIOD 300 /* Five mins */ |
|
+ |
|
+typedef struct ctr_query_cbk_args { |
|
+ int query_fd; |
|
+ int count; |
|
+} ctr_query_cbk_args_t; |
|
+ |
|
+/*CTR Xlator Private structure*/ |
|
+typedef struct gf_ctr_private { |
|
+ gf_boolean_t enabled; |
|
+ char *ctr_db_path; |
|
+ gf_boolean_t ctr_hot_brick; |
|
+ gf_boolean_t ctr_record_wind; |
|
+ gf_boolean_t ctr_record_unwind; |
|
+ gf_boolean_t ctr_record_counter; |
|
+ gf_boolean_t ctr_record_metadata_heat; |
|
+ gf_boolean_t ctr_link_consistency; |
|
+ gfdb_db_type_t gfdb_db_type; |
|
+ gfdb_sync_type_t gfdb_sync_type; |
|
+ gfdb_conn_node_t *_db_conn; |
|
+ uint64_t ctr_lookupheal_link_timeout; |
|
+ uint64_t ctr_lookupheal_inode_timeout; |
|
+ gf_boolean_t compact_active; |
|
+ gf_boolean_t compact_mode_switched; |
|
+ pthread_mutex_t compact_lock; |
|
+} gf_ctr_private_t; |
|
+ |
|
+/* |
|
+ * gf_ctr_local_t is the ctr xlator local data structure that is stored in |
|
+ * the call_frame of each FOP. |
|
+ * |
|
+ * gfdb_db_record: The gf_ctr_local contains a gfdb_db_record object, which is |
|
+ * used by the insert_record() api from the libgfdb. The gfdb_db_record object |
|
+ * will contain all the inode and hardlink(only for dentry fops: create, |
|
+ * mknod,link, unlink, rename).The ctr_local is keep alive till the unwind |
|
+ * call and will be release during the unwind. The same gfdb_db_record will |
|
+ * used for the unwind insert_record() api, to record unwind in the database. |
|
+ * |
|
+ * ia_inode_type in gf_ctr_local will tell the type of the inode. This is |
|
+ * important for during the unwind path. As we will not have the inode during |
|
+ * the unwind path. We would have include this in the gfdb_db_record itself |
|
+ * but currently we record only file inode information. |
|
+ * |
|
+ * is_internal_fop in gf_ctr_local will tell us if this is a internal fop and |
|
+ * take special/no action. We don't record change/access times or increement |
|
+ * heat counter for internal fops from rebalancer. |
|
+ * */ |
|
+typedef struct gf_ctr_local { |
|
+ gfdb_db_record_t gfdb_db_record; |
|
+ ia_type_t ia_inode_type; |
|
+ gf_boolean_t is_internal_fop; |
|
+ gf_special_pid_t client_pid; |
|
+} gf_ctr_local_t; |
|
+/* |
|
+ * Easy access of gfdb_db_record of ctr_local |
|
+ * */ |
|
+#define CTR_DB_REC(ctr_local) (ctr_local->gfdb_db_record) |
|
+ |
|
+/*Clear db record*/ |
|
+#define CLEAR_CTR_DB_RECORD(ctr_local) \ |
|
+ do { \ |
|
+ ctr_local->gfdb_db_record.gfdb_fop_path = GFDB_FOP_INVALID; \ |
|
+ memset(&(ctr_local->gfdb_db_record.gfdb_wind_change_time), 0, \ |
|
+ sizeof(gfdb_time_t)); \ |
|
+ memset(&(ctr_local->gfdb_db_record.gfdb_unwind_change_time), 0, \ |
|
+ sizeof(gfdb_time_t)); \ |
|
+ gf_uuid_clear(ctr_local->gfdb_db_record.gfid); \ |
|
+ gf_uuid_clear(ctr_local->gfdb_db_record.pargfid); \ |
|
+ memset(ctr_local->gfdb_db_record.file_name, 0, GF_NAME_MAX + 1); \ |
|
+ memset(ctr_local->gfdb_db_record.old_file_name, 0, GF_NAME_MAX + 1); \ |
|
+ ctr_local->gfdb_db_record.gfdb_fop_type = GFDB_FOP_INVALID_OP; \ |
|
+ ctr_local->ia_inode_type = IA_INVAL; \ |
|
+ } while (0) |
|
+ |
|
+static gf_ctr_local_t * |
|
+init_ctr_local_t(xlator_t *this) |
|
+{ |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ |
|
+ ctr_local = mem_get0(this->local_pool); |
|
+ if (!ctr_local) { |
|
+ gf_msg(GFDB_DATA_STORE, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, |
|
+ "Error while creating ctr local"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ CLEAR_CTR_DB_RECORD(ctr_local); |
|
+out: |
|
+ return ctr_local; |
|
+} |
|
+ |
|
+static void |
|
+free_ctr_local(gf_ctr_local_t *ctr_local) |
|
+{ |
|
+ if (ctr_local) |
|
+ mem_put(ctr_local); |
|
+} |
|
+ |
|
+/****************************************************************************** |
|
+ * |
|
+ * |
|
+ * Context Carrier Structures |
|
+ * |
|
+ * |
|
+ * ****************************************************************************/ |
|
+ |
|
+/* |
|
+ * Context Carrier structures are used to carry relevant information about |
|
+ * inodes and links from the fops calls to the ctr_insert_wind. |
|
+ * These structure just have pointers to the original data and donot |
|
+ * do a deep copy of any data. This info is deep copied to |
|
+ * ctr_local->gfdb_db_record and passed to insert_record() api of libgfdb. This |
|
+ * info remains persistent for the unwind in ctr_local->gfdb_db_record |
|
+ * and once used will be destroyed. |
|
+ * |
|
+ * gf_ctr_link_context_t : Context structure for hard links |
|
+ * gf_ctr_inode_context_t : Context structure for inodes |
|
+ * |
|
+ * */ |
|
+ |
|
+/*Context Carrier Structure for hard links*/ |
|
+typedef struct gf_ctr_link_context { |
|
+ uuid_t *pargfid; |
|
+ const char *basename; |
|
+} gf_ctr_link_context_t; |
|
+ |
|
+/*Context Carrier Structure for inodes*/ |
|
+typedef struct gf_ctr_inode_context { |
|
+ ia_type_t ia_type; |
|
+ uuid_t *gfid; |
|
+ uuid_t *old_gfid; |
|
+ gf_ctr_link_context_t *new_link_cx; |
|
+ gf_ctr_link_context_t *old_link_cx; |
|
+ gfdb_fop_type_t fop_type; |
|
+ gfdb_fop_path_t fop_path; |
|
+ gf_boolean_t is_internal_fop; |
|
+ /* Indicating metadata fops */ |
|
+ gf_boolean_t is_metadata_fop; |
|
+} gf_ctr_inode_context_t; |
|
+ |
|
+/*******************Util Macros for Context Carrier Structures*****************/ |
|
+ |
|
+/*Checks if ctr_link_cx is sane!*/ |
|
+#define IS_CTR_LINK_CX_SANE(ctr_link_cx) \ |
|
+ do { \ |
|
+ if (ctr_link_cx) { \ |
|
+ if (ctr_link_cx->pargfid) \ |
|
+ GF_ASSERT(*(ctr_link_cx->pargfid)); \ |
|
+ GF_ASSERT(ctr_link_cx->basename); \ |
|
+ }; \ |
|
+ } while (0) |
|
+ |
|
+/*Clear and fill the ctr_link_context with values*/ |
|
+#define FILL_CTR_LINK_CX(ctr_link_cx, _pargfid, _basename, label) \ |
|
+ do { \ |
|
+ GF_VALIDATE_OR_GOTO("ctr", ctr_link_cx, label); \ |
|
+ GF_VALIDATE_OR_GOTO("ctr", _pargfid, label); \ |
|
+ GF_VALIDATE_OR_GOTO("ctr", _basename, label); \ |
|
+ memset(ctr_link_cx, 0, sizeof(*ctr_link_cx)); \ |
|
+ ctr_link_cx->pargfid = &_pargfid; \ |
|
+ ctr_link_cx->basename = _basename; \ |
|
+ } while (0) |
|
+ |
|
+#define NEW_LINK_CX(ctr_inode_cx) ctr_inode_cx->new_link_cx |
|
+ |
|
+#define OLD_LINK_CX(ctr_inode_cx) ctr_inode_cx->old_link_cx |
|
+ |
|
+/*Checks if ctr_inode_cx is sane!*/ |
|
+#define IS_CTR_INODE_CX_SANE(ctr_inode_cx) \ |
|
+ do { \ |
|
+ GF_ASSERT(ctr_inode_cx); \ |
|
+ GF_ASSERT(ctr_inode_cx->gfid); \ |
|
+ GF_ASSERT(*(ctr_inode_cx->gfid)); \ |
|
+ GF_ASSERT(ctr_inode_cx->fop_type != GFDB_FOP_INVALID_OP); \ |
|
+ GF_ASSERT(ctr_inode_cx->fop_path != GFDB_FOP_INVALID); \ |
|
+ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \ |
|
+ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \ |
|
+ } while (0) |
|
+ |
|
+/*Clear and fill the ctr_inode_context with values*/ |
|
+#define FILL_CTR_INODE_CONTEXT(ctr_inode_cx, _ia_type, _gfid, _new_link_cx, \ |
|
+ _old_link_cx, _fop_type, _fop_path) \ |
|
+ do { \ |
|
+ GF_ASSERT(ctr_inode_cx); \ |
|
+ GF_ASSERT(_gfid); \ |
|
+ GF_ASSERT(_fop_type != GFDB_FOP_INVALID_OP); \ |
|
+ GF_ASSERT(_fop_path != GFDB_FOP_INVALID); \ |
|
+ memset(ctr_inode_cx, 0, sizeof(*ctr_inode_cx)); \ |
|
+ ctr_inode_cx->ia_type = _ia_type; \ |
|
+ ctr_inode_cx->gfid = &_gfid; \ |
|
+ IS_CTR_LINK_CX_SANE(NEW_LINK_CX(ctr_inode_cx)); \ |
|
+ if (_new_link_cx) \ |
|
+ NEW_LINK_CX(ctr_inode_cx) = _new_link_cx; \ |
|
+ IS_CTR_LINK_CX_SANE(OLD_LINK_CX(ctr_inode_cx)); \ |
|
+ if (_old_link_cx) \ |
|
+ OLD_LINK_CX(ctr_inode_cx) = _old_link_cx; \ |
|
+ ctr_inode_cx->fop_type = _fop_type; \ |
|
+ ctr_inode_cx->fop_path = _fop_path; \ |
|
+ } while (0) |
|
+ |
|
+/****************************************************************************** |
|
+ * |
|
+ * Util functions or macros used by |
|
+ * insert wind and insert unwind |
|
+ * |
|
+ * ****************************************************************************/ |
|
+/* Free ctr frame local */ |
|
+static inline void |
|
+ctr_free_frame_local(call_frame_t *frame) |
|
+{ |
|
+ if (frame) { |
|
+ free_ctr_local((gf_ctr_local_t *)frame->local); |
|
+ frame->local = NULL; |
|
+ } |
|
+} |
|
+ |
|
+/* Setting GF_REQUEST_LINK_COUNT_XDATA in dict |
|
+ * that has to be sent to POSIX Xlator to send |
|
+ * link count in unwind path. |
|
+ * return 0 for success with not creation of dict |
|
+ * return 1 for success with creation of dict |
|
+ * return -1 for failure. |
|
+ * */ |
|
+static inline int |
|
+set_posix_link_request(xlator_t *this, dict_t **xdata) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_boolean_t is_created = _gf_false; |
|
+ |
|
+ GF_VALIDATE_OR_GOTO("ctr", this, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, xdata, out); |
|
+ |
|
+ /*create xdata if NULL*/ |
|
+ if (!*xdata) { |
|
+ *xdata = dict_new(); |
|
+ is_created = _gf_true; |
|
+ ret = 1; |
|
+ } else { |
|
+ ret = 0; |
|
+ } |
|
+ |
|
+ if (!*xdata) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_XDATA_NULL, |
|
+ "xdata is NULL :Cannot send " |
|
+ "GF_REQUEST_LINK_COUNT_XDATA to posix"); |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = dict_set_int32(*xdata, GF_REQUEST_LINK_COUNT_XDATA, 1); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, |
|
+ "Failed setting GF_REQUEST_LINK_COUNT_XDATA"); |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ if (ret == -1) { |
|
+ if (*xdata && is_created) { |
|
+ dict_unref(*xdata); |
|
+ } |
|
+ } |
|
+ return ret; |
|
+} |
|
+ |
|
+/* |
|
+ * If a bitrot fop |
|
+ * */ |
|
+#define BITROT_FOP(frame) \ |
|
+ (frame->root->pid == GF_CLIENT_PID_BITD || \ |
|
+ frame->root->pid == GF_CLIENT_PID_SCRUB) |
|
+ |
|
+/* |
|
+ * If a rebalancer fop |
|
+ * */ |
|
+#define REBALANCE_FOP(frame) (frame->root->pid == GF_CLIENT_PID_DEFRAG) |
|
+ |
|
+/* |
|
+ * If its a tiering rebalancer fop |
|
+ * */ |
|
+#define TIER_REBALANCE_FOP(frame) \ |
|
+ (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG) |
|
+ |
|
+/* |
|
+ * If its a AFR SELF HEAL |
|
+ * */ |
|
+#define AFR_SELF_HEAL_FOP(frame) (frame->root->pid == GF_CLIENT_PID_SELF_HEALD) |
|
+ |
|
+/* |
|
+ * if a rebalancer fop goto |
|
+ * */ |
|
+#define CTR_IF_REBALANCE_FOP_THEN_GOTO(frame, label) \ |
|
+ do { \ |
|
+ if (REBALANCE_FOP(frame)) \ |
|
+ goto label; \ |
|
+ } while (0) |
|
+ |
|
+/* |
|
+ * Internal fop |
|
+ * |
|
+ * */ |
|
+static inline gf_boolean_t |
|
+is_internal_fop(call_frame_t *frame, dict_t *xdata) |
|
+{ |
|
+ gf_boolean_t ret = _gf_false; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(frame->root); |
|
+ |
|
+ if (AFR_SELF_HEAL_FOP(frame)) { |
|
+ ret = _gf_true; |
|
+ } |
|
+ if (BITROT_FOP(frame)) { |
|
+ ret = _gf_true; |
|
+ } |
|
+ if (REBALANCE_FOP(frame) || TIER_REBALANCE_FOP(frame)) { |
|
+ ret = _gf_true; |
|
+ if (xdata && dict_get(xdata, CTR_ATTACH_TIER_LOOKUP)) { |
|
+ ret = _gf_false; |
|
+ } |
|
+ } |
|
+ if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { |
|
+ ret = _gf_true; |
|
+ } |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+#define CTR_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \ |
|
+ do { \ |
|
+ if (is_internal_fop(frame, dict)) \ |
|
+ goto label; \ |
|
+ } while (0) |
|
+ |
|
+/* if fop has failed exit */ |
|
+#define CTR_IF_FOP_FAILED_THEN_GOTO(this, op_ret, op_errno, label) \ |
|
+ do { \ |
|
+ if (op_ret == -1) { \ |
|
+ gf_msg_trace(this->name, 0, "Failed fop with %s", \ |
|
+ strerror(op_errno)); \ |
|
+ goto label; \ |
|
+ }; \ |
|
+ } while (0) |
|
+ |
|
+/* |
|
+ * IS CTR Xlator is disabled then goto to label |
|
+ * */ |
|
+#define CTR_IS_DISABLED_THEN_GOTO(this, label) \ |
|
+ do { \ |
|
+ gf_ctr_private_t *_priv = NULL; \ |
|
+ GF_ASSERT(this); \ |
|
+ GF_ASSERT(this->private); \ |
|
+ _priv = this->private; \ |
|
+ if (!_priv->_db_conn) \ |
|
+ goto label; \ |
|
+ } while (0) |
|
+ |
|
+/* |
|
+ * IS CTR record metadata heat is disabled then goto to label |
|
+ * */ |
|
+#define CTR_RECORD_METADATA_HEAT_IS_DISABLED_THEN_GOTO(this, label) \ |
|
+ do { \ |
|
+ gf_ctr_private_t *_priv = NULL; \ |
|
+ GF_ASSERT(this); \ |
|
+ GF_ASSERT(this->private); \ |
|
+ _priv = this->private; \ |
|
+ if (!_priv->ctr_record_metadata_heat) \ |
|
+ goto label; \ |
|
+ } while (0) |
|
+ |
|
+int |
|
+fill_db_record_for_unwind(xlator_t *this, gf_ctr_local_t *ctr_local, |
|
+ gfdb_fop_type_t fop_type, gfdb_fop_path_t fop_path); |
|
+ |
|
+int |
|
+fill_db_record_for_wind(xlator_t *this, gf_ctr_local_t *ctr_local, |
|
+ gf_ctr_inode_context_t *ctr_inode_cx); |
|
+ |
|
+/******************************************************************************* |
|
+ * CTR INSERT WIND |
|
+ * ***************************************************************************** |
|
+ * Function used to insert/update record into the database during a wind fop |
|
+ * This function creates ctr_local structure into the frame of the fop |
|
+ * call. |
|
+ * ****************************************************************************/ |
|
+ |
|
+static inline int |
|
+ctr_insert_wind(call_frame_t *frame, xlator_t *this, |
|
+ gf_ctr_inode_context_t *ctr_inode_cx) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_private_t *_priv = NULL; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(frame->root); |
|
+ GF_ASSERT(this); |
|
+ IS_CTR_INODE_CX_SANE(ctr_inode_cx); |
|
+ |
|
+ _priv = this->private; |
|
+ GF_ASSERT(_priv); |
|
+ |
|
+ GF_ASSERT(_priv->_db_conn); |
|
+ |
|
+ /*If record_wind option of CTR is on record wind for |
|
+ * regular files only*/ |
|
+ if (_priv->ctr_record_wind && ctr_inode_cx->ia_type != IA_IFDIR) { |
|
+ frame->local = init_ctr_local_t(this); |
|
+ if (!frame->local) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, |
|
+ "WIND: Error while creating ctr local"); |
|
+ goto out; |
|
+ }; |
|
+ ctr_local = frame->local; |
|
+ ctr_local->client_pid = frame->root->pid; |
|
+ ctr_local->is_internal_fop = ctr_inode_cx->is_internal_fop; |
|
+ |
|
+ /* Decide whether to record counters or not */ |
|
+ CTR_DB_REC(ctr_local).do_record_counters = _gf_false; |
|
+ /* If record counter is enabled */ |
|
+ if (_priv->ctr_record_counter) { |
|
+ /* If not a internal fop */ |
|
+ if (!(ctr_local->is_internal_fop)) { |
|
+ /* If its a metadata fop AND |
|
+ * record metadata heat |
|
+ * OR |
|
+ * its NOT a metadata fop */ |
|
+ if ((ctr_inode_cx->is_metadata_fop && |
|
+ _priv->ctr_record_metadata_heat) || |
|
+ (!ctr_inode_cx->is_metadata_fop)) { |
|
+ CTR_DB_REC(ctr_local).do_record_counters = _gf_true; |
|
+ } |
|
+ } |
|
+ } |
|
+ |
|
+ /* Decide whether to record times or not |
|
+ * For non internal FOPS record times as usual*/ |
|
+ CTR_DB_REC(ctr_local).do_record_times = _gf_false; |
|
+ if (!ctr_local->is_internal_fop) { |
|
+ /* If its a metadata fop AND |
|
+ * record metadata heat |
|
+ * OR |
|
+ * its NOT a metadata fop */ |
|
+ if ((ctr_inode_cx->is_metadata_fop && |
|
+ _priv->ctr_record_metadata_heat) || |
|
+ (!ctr_inode_cx->is_metadata_fop)) { |
|
+ CTR_DB_REC(ctr_local).do_record_times = |
|
+ (_priv->ctr_record_wind || _priv->ctr_record_unwind); |
|
+ } |
|
+ } |
|
+ /* when its a internal FOPS*/ |
|
+ else { |
|
+ /* Record times only for create |
|
+ * i.e when the inode is created */ |
|
+ CTR_DB_REC(ctr_local).do_record_times = (isdentrycreatefop( |
|
+ ctr_inode_cx->fop_type)) |
|
+ ? _gf_true |
|
+ : _gf_false; |
|
+ } |
|
+ |
|
+ /*Fill the db record for insertion*/ |
|
+ ret = fill_db_record_for_wind(this, ctr_local, ctr_inode_cx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND, |
|
+ "WIND: Error filling ctr local"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*Insert the db record*/ |
|
+ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_INSERT_RECORD_WIND_FAILED, |
|
+ "WIND: Inserting of record failed!"); |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ |
|
+ if (ret) { |
|
+ free_ctr_local(ctr_local); |
|
+ frame->local = NULL; |
|
+ } |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/******************************************************************************* |
|
+ * CTR INSERT UNWIND |
|
+ * ***************************************************************************** |
|
+ * Function used to insert/update record into the database during a unwind fop |
|
+ * This function destroys ctr_local structure into the frame of the fop |
|
+ * call at the end. |
|
+ * ****************************************************************************/ |
|
+static inline int |
|
+ctr_insert_unwind(call_frame_t *frame, xlator_t *this, gfdb_fop_type_t fop_type, |
|
+ gfdb_fop_path_t fop_path) |
|
+{ |
|
+ int ret = -1; |
|
+ gf_ctr_private_t *_priv = NULL; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(this); |
|
+ |
|
+ _priv = this->private; |
|
+ GF_ASSERT(_priv); |
|
+ |
|
+ GF_ASSERT(_priv->_db_conn); |
|
+ |
|
+ ctr_local = frame->local; |
|
+ |
|
+ if (ctr_local && (_priv->ctr_record_unwind || isdentryfop(fop_type)) && |
|
+ (ctr_local->ia_inode_type != IA_IFDIR)) { |
|
+ CTR_DB_REC(ctr_local).do_record_uwind_time = _priv->ctr_record_unwind; |
|
+ |
|
+ ret = fill_db_record_for_unwind(this, ctr_local, fop_type, fop_path); |
|
+ if (ret == -1) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, |
|
+ "UNWIND: Error filling ctr local"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = insert_record(_priv->_db_conn, &ctr_local->gfdb_db_record); |
|
+ if (ret == -1) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, |
|
+ "UNWIND: Error filling ctr local"); |
|
+ goto out; |
|
+ } |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+/****************************************************************************** |
|
+ * Delete file/flink record/s from db |
|
+ * ****************************************************************************/ |
|
+static inline int |
|
+ctr_delete_hard_link_from_db(xlator_t *this, uuid_t gfid, uuid_t pargfid, |
|
+ char *basename, gfdb_fop_type_t fop_type, |
|
+ gfdb_fop_path_t fop_path) |
|
+{ |
|
+ int ret = -1; |
|
+ gfdb_db_record_t gfdb_db_record; |
|
+ gf_ctr_private_t *_priv = NULL; |
|
+ |
|
+ _priv = this->private; |
|
+ GF_VALIDATE_OR_GOTO(this->name, _priv, out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(gfid)), out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, (!gf_uuid_is_null(pargfid)), out); |
|
+ GF_VALIDATE_OR_GOTO(this->name, (fop_type == GFDB_FOP_DENTRY_WRITE), out); |
|
+ GF_VALIDATE_OR_GOTO( |
|
+ this->name, (fop_path == GFDB_FOP_UNDEL || GFDB_FOP_UNDEL_ALL), out); |
|
+ |
|
+ /* Set gfdb_db_record to 0 */ |
|
+ memset(&gfdb_db_record, 0, sizeof(gfdb_db_record)); |
|
+ |
|
+ /* Copy basename */ |
|
+ if (snprintf(gfdb_db_record.file_name, GF_NAME_MAX, "%s", basename) >= |
|
+ GF_NAME_MAX) |
|
+ goto out; |
|
+ |
|
+ /* Copy gfid into db record */ |
|
+ gf_uuid_copy(gfdb_db_record.gfid, gfid); |
|
+ |
|
+ /* Copy pargid into db record */ |
|
+ gf_uuid_copy(gfdb_db_record.pargfid, pargfid); |
|
+ |
|
+ gfdb_db_record.gfdb_fop_path = fop_path; |
|
+ gfdb_db_record.gfdb_fop_type = fop_type; |
|
+ |
|
+ /*send delete request to db*/ |
|
+ ret = insert_record(_priv->_db_conn, &gfdb_db_record); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_INSERT_RECORD_WIND_FAILED, |
|
+ "Failed to delete record. %s", basename); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+/******************************* Hard link function ***************************/ |
|
+ |
|
+static inline gf_boolean_t |
|
+__is_inode_expired(ctr_xlator_ctx_t *ctr_xlator_ctx, gf_ctr_private_t *_priv, |
|
+ gfdb_time_t *current_time) |
|
+{ |
|
+ gf_boolean_t ret = _gf_false; |
|
+ uint64_t time_diff = 0; |
|
+ |
|
+ GF_ASSERT(ctr_xlator_ctx); |
|
+ GF_ASSERT(_priv); |
|
+ GF_ASSERT(current_time); |
|
+ |
|
+ time_diff = current_time->tv_sec - ctr_xlator_ctx->inode_heal_period; |
|
+ |
|
+ ret = (time_diff >= _priv->ctr_lookupheal_inode_timeout) ? _gf_true |
|
+ : _gf_false; |
|
+ return ret; |
|
+} |
|
+ |
|
+static inline gf_boolean_t |
|
+__is_hardlink_expired(ctr_hard_link_t *ctr_hard_link, gf_ctr_private_t *_priv, |
|
+ gfdb_time_t *current_time) |
|
+{ |
|
+ gf_boolean_t ret = _gf_false; |
|
+ uint64_t time_diff = 0; |
|
+ |
|
+ GF_ASSERT(ctr_hard_link); |
|
+ GF_ASSERT(_priv); |
|
+ GF_ASSERT(current_time); |
|
+ |
|
+ time_diff = current_time->tv_sec - ctr_hard_link->hardlink_heal_period; |
|
+ |
|
+ ret = ret || (time_diff >= _priv->ctr_lookupheal_link_timeout) ? _gf_true |
|
+ : _gf_false; |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/* Return values of heal*/ |
|
+typedef enum ctr_heal_ret_val { |
|
+ CTR_CTX_ERROR = -1, |
|
+ /* No healing required */ |
|
+ CTR_TRY_NO_HEAL = 0, |
|
+ /* Try healing hard link */ |
|
+ CTR_TRY_HARDLINK_HEAL = 1, |
|
+ /* Try healing inode */ |
|
+ CTR_TRY_INODE_HEAL = 2, |
|
+} ctr_heal_ret_val_t; |
|
+ |
|
+/** |
|
+ * @brief Function to add hard link to the inode context variable. |
|
+ * The inode context maintainences a in-memory list. This is used |
|
+ * smart healing of database. |
|
+ * @param frame of the FOP |
|
+ * @param this is the Xlator instant |
|
+ * @param inode |
|
+ * @return Return ctr_heal_ret_val_t |
|
+ */ |
|
+ |
|
+static inline ctr_heal_ret_val_t |
|
+add_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) |
|
+{ |
|
+ ctr_heal_ret_val_t ret_val = CTR_TRY_NO_HEAL; |
|
+ int ret = -1; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ ctr_hard_link_t *ctr_hard_link = NULL; |
|
+ gf_ctr_private_t *_priv = NULL; |
|
+ gfdb_time_t current_time = {0}; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(inode); |
|
+ GF_ASSERT(this->private); |
|
+ |
|
+ _priv = this->private; |
|
+ |
|
+ ctr_local = frame->local; |
|
+ if (!ctr_local) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode); |
|
+ if (!ctr_xlator_ctx) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, |
|
+ "Failed accessing ctr inode context"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ LOCK(&ctr_xlator_ctx->lock); |
|
+ |
|
+ /* Check if the hard link already exists |
|
+ * in the ctr inode context*/ |
|
+ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, |
|
+ CTR_DB_REC(ctr_local).pargfid, |
|
+ CTR_DB_REC(ctr_local).file_name); |
|
+ /* if there then ignore */ |
|
+ if (ctr_hard_link) { |
|
+ ret = gettimeofday(¤t_time, NULL); |
|
+ if (ret == -1) { |
|
+ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); |
|
+ ret_val = CTR_CTX_ERROR; |
|
+ goto unlock; |
|
+ } |
|
+ |
|
+ if (__is_hardlink_expired(ctr_hard_link, _priv, ¤t_time)) { |
|
+ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; |
|
+ ret_val = ret_val | CTR_TRY_HARDLINK_HEAL; |
|
+ } |
|
+ |
|
+ if (__is_inode_expired(ctr_xlator_ctx, _priv, ¤t_time)) { |
|
+ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec; |
|
+ ret_val = ret_val | CTR_TRY_INODE_HEAL; |
|
+ } |
|
+ |
|
+ goto unlock; |
|
+ } |
|
+ |
|
+ /* Add the hard link to the list*/ |
|
+ ret = ctr_add_hard_link(this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid, |
|
+ CTR_DB_REC(ctr_local).file_name); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, |
|
+ "Failed to add hardlink to the ctr inode context"); |
|
+ ret_val = CTR_CTX_ERROR; |
|
+ goto unlock; |
|
+ } |
|
+ |
|
+ ret_val = CTR_TRY_NO_HEAL; |
|
+unlock: |
|
+ UNLOCK(&ctr_xlator_ctx->lock); |
|
+out: |
|
+ return ret_val; |
|
+} |
|
+ |
|
+static inline int |
|
+delete_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(inode); |
|
+ |
|
+ ctr_local = frame->local; |
|
+ if (!ctr_local) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ctr_xlator_ctx = get_ctr_xlator_ctx(this, inode); |
|
+ if (!ctr_xlator_ctx) { |
|
+ /* Since there is no ctr inode context so nothing more to do */ |
|
+ ret = 0; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = ctr_delete_hard_link(this, ctr_xlator_ctx, |
|
+ CTR_DB_REC(ctr_local).pargfid, |
|
+ CTR_DB_REC(ctr_local).file_name); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, |
|
+ "Failed to delete hard link"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+static inline int |
|
+update_hard_link_ctx(call_frame_t *frame, xlator_t *this, inode_t *inode) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ gf_ctr_local_t *ctr_local = NULL; |
|
+ |
|
+ GF_ASSERT(frame); |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(inode); |
|
+ |
|
+ ctr_local = frame->local; |
|
+ if (!ctr_local) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ctr_xlator_ctx = init_ctr_xlator_ctx(this, inode); |
|
+ if (!ctr_xlator_ctx) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, |
|
+ "Failed accessing ctr inode context"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = ctr_update_hard_link( |
|
+ this, ctr_xlator_ctx, CTR_DB_REC(ctr_local).pargfid, |
|
+ CTR_DB_REC(ctr_local).file_name, CTR_DB_REC(ctr_local).old_pargfid, |
|
+ CTR_DB_REC(ctr_local).old_file_name); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_DELETE_HARDLINK_FAILED, |
|
+ "Failed to delete hard link"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+/****************************************************************************** |
|
+ * |
|
+ * CTR xlator init related functions |
|
+ * |
|
+ * |
|
+ * ****************************************************************************/ |
|
+int |
|
+extract_db_params(xlator_t *this, dict_t *params_dict, gfdb_db_type_t db_type); |
|
+ |
|
+int |
|
+extract_ctr_options(xlator_t *this, gf_ctr_private_t *_priv); |
|
+ |
|
+#endif |
|
diff --git a/xlators/features/changetimerecorder/src/ctr-messages.h b/xlators/features/changetimerecorder/src/ctr-messages.h |
|
new file mode 100644 |
|
index 0000000..23adf0a |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/ctr-messages.h |
|
@@ -0,0 +1,61 @@ |
|
+/* |
|
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+ */ |
|
+ |
|
+#ifndef _CTR_MESSAGES_H_ |
|
+#define _CTR_MESSAGES_H_ |
|
+ |
|
+#include <glusterfs/glfs-message-id.h> |
|
+ |
|
+/* To add new message IDs, append new identifiers at the end of the list. |
|
+ * |
|
+ * Never remove a message ID. If it's not used anymore, you can rename it or |
|
+ * leave it as it is, but not delete it. This is to prevent reutilization of |
|
+ * IDs by other messages. |
|
+ * |
|
+ * The component name must match one of the entries defined in |
|
+ * glfs-message-id.h. |
|
+ */ |
|
+ |
|
+GLFS_MSGID( |
|
+ CTR, CTR_MSG_CREATE_CTR_LOCAL_ERROR_WIND, |
|
+ CTR_MSG_FILL_CTR_LOCAL_ERROR_UNWIND, CTR_MSG_FILL_CTR_LOCAL_ERROR_WIND, |
|
+ CTR_MSG_INSERT_LINK_WIND_FAILED, CTR_MSG_INSERT_WRITEV_WIND_FAILED, |
|
+ CTR_MSG_INSERT_WRITEV_UNWIND_FAILED, CTR_MSG_INSERT_SETATTR_WIND_FAILED, |
|
+ CTR_MSG_INSERT_SETATTR_UNWIND_FAILED, |
|
+ CTR_MSG_INSERT_FREMOVEXATTR_UNWIND_FAILED, |
|
+ CTR_MSG_INSERT_FREMOVEXATTR_WIND_FAILED, |
|
+ CTR_MSG_INSERT_REMOVEXATTR_WIND_FAILED, |
|
+ CTR_MSG_INSERT_REMOVEXATTR_UNWIND_FAILED, |
|
+ CTR_MSG_INSERT_TRUNCATE_WIND_FAILED, CTR_MSG_INSERT_TRUNCATE_UNWIND_FAILED, |
|
+ CTR_MSG_INSERT_FTRUNCATE_UNWIND_FAILED, |
|
+ CTR_MSG_INSERT_FTRUNCATE_WIND_FAILED, CTR_MSG_INSERT_RENAME_WIND_FAILED, |
|
+ CTR_MSG_INSERT_RENAME_UNWIND_FAILED, |
|
+ CTR_MSG_ACCESS_CTR_INODE_CONTEXT_FAILED, CTR_MSG_ADD_HARDLINK_FAILED, |
|
+ CTR_MSG_DELETE_HARDLINK_FAILED, CTR_MSG_UPDATE_HARDLINK_FAILED, |
|
+ CTR_MSG_GET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, |
|
+ CTR_MSG_SET_CTR_RESPONSE_LINK_COUNT_XDATA_FAILED, |
|
+ CTR_MSG_INSERT_UNLINK_UNWIND_FAILED, CTR_MSG_INSERT_UNLINK_WIND_FAILED, |
|
+ CTR_MSG_XDATA_NULL, CTR_MSG_INSERT_FSYNC_WIND_FAILED, |
|
+ CTR_MSG_INSERT_FSYNC_UNWIND_FAILED, CTR_MSG_INSERT_MKNOD_UNWIND_FAILED, |
|
+ CTR_MSG_INSERT_MKNOD_WIND_FAILED, CTR_MSG_INSERT_CREATE_WIND_FAILED, |
|
+ CTR_MSG_INSERT_CREATE_UNWIND_FAILED, CTR_MSG_INSERT_RECORD_WIND_FAILED, |
|
+ CTR_MSG_INSERT_READV_WIND_FAILED, CTR_MSG_GET_GFID_FROM_DICT_FAILED, |
|
+ CTR_MSG_SET, CTR_MSG_FATAL_ERROR, CTR_MSG_DANGLING_VOLUME, |
|
+ CTR_MSG_CALLOC_FAILED, CTR_MSG_EXTRACT_CTR_XLATOR_OPTIONS_FAILED, |
|
+ CTR_MSG_INIT_DB_PARAMS_FAILED, CTR_MSG_CREATE_LOCAL_MEMORY_POOL_FAILED, |
|
+ CTR_MSG_MEM_ACC_INIT_FAILED, CTR_MSG_CLOSE_DB_CONN_FAILED, |
|
+ CTR_MSG_FILL_UNWIND_TIME_REC_ERROR, CTR_MSG_WRONG_FOP_PATH, |
|
+ CTR_MSG_CONSTRUCT_DB_PATH_FAILED, CTR_MSG_SET_VALUE_TO_SQL_PARAM_FAILED, |
|
+ CTR_MSG_XLATOR_DISABLED, CTR_MSG_HARDLINK_MISSING_IN_LIST, |
|
+ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, CTR_MSG_INIT_LOCK_FAILED, |
|
+ CTR_MSG_COPY_FAILED, CTR_MSG_EXTRACT_DB_PARAM_OPTIONS_FAILED, |
|
+ CTR_MSG_ADD_HARDLINK_TO_CTR_INODE_CONTEXT_FAILED, CTR_MSG_NULL_LOCAL); |
|
+ |
|
+#endif /* !_CTR_MESSAGES_H_ */ |
|
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c |
|
new file mode 100644 |
|
index 0000000..b6b66d5 |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.c |
|
@@ -0,0 +1,362 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#include "ctr-xlator-ctx.h" |
|
+#include "ctr-messages.h" |
|
+#include <time.h> |
|
+#include <sys/time.h> |
|
+ |
|
+#define IS_THE_ONLY_HARDLINK(ctr_hard_link) \ |
|
+ (ctr_hard_link->list.next == ctr_hard_link->list.prev) |
|
+ |
|
+static void |
|
+fini_ctr_hard_link(ctr_hard_link_t **ctr_hard_link) |
|
+{ |
|
+ GF_ASSERT(ctr_hard_link); |
|
+ |
|
+ if (*ctr_hard_link) |
|
+ return; |
|
+ GF_FREE((*ctr_hard_link)->base_name); |
|
+ GF_FREE(*ctr_hard_link); |
|
+ *ctr_hard_link = NULL; |
|
+} |
|
+ |
|
+/* Please lock the ctr_xlator_ctx before using this function */ |
|
+ctr_hard_link_t * |
|
+ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, |
|
+ uuid_t pgfid, const char *base_name) |
|
+{ |
|
+ ctr_hard_link_t *_hard_link = NULL; |
|
+ ctr_hard_link_t *searched_hardlink = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(ctr_xlator_ctx); |
|
+ |
|
+ if (pgfid == NULL || base_name == NULL) |
|
+ goto out; |
|
+ |
|
+ /*linear search*/ |
|
+ list_for_each_entry(_hard_link, &ctr_xlator_ctx->hardlink_list, list) |
|
+ { |
|
+ if (gf_uuid_compare(_hard_link->pgfid, pgfid) == 0 && |
|
+ _hard_link->base_name && |
|
+ strcmp(_hard_link->base_name, base_name) == 0) { |
|
+ searched_hardlink = _hard_link; |
|
+ break; |
|
+ } |
|
+ } |
|
+ |
|
+out: |
|
+ return searched_hardlink; |
|
+} |
|
+ |
|
+/* Please lock the ctr_xlator_ctx before using this function */ |
|
+int |
|
+ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, |
|
+ uuid_t pgfid, const char *base_name) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_hard_link_t *ctr_hard_link = NULL; |
|
+ struct timeval current_time = {0}; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(ctr_xlator_ctx); |
|
+ |
|
+ if (pgfid == NULL || base_name == NULL) |
|
+ goto out; |
|
+ |
|
+ ctr_hard_link = GF_CALLOC(1, sizeof(*ctr_hard_link), gf_ctr_mt_hard_link_t); |
|
+ if (!ctr_hard_link) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, CTR_MSG_CALLOC_FAILED, |
|
+ "Failed allocating " |
|
+ "ctr_hard_link"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /*Initialize the ctr_hard_link object and |
|
+ * Assign the values : parent GFID and basename*/ |
|
+ INIT_LIST_HEAD(&ctr_hard_link->list); |
|
+ gf_uuid_copy(ctr_hard_link->pgfid, pgfid); |
|
+ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name); |
|
+ if (ret < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED, |
|
+ "Failed copying basename" |
|
+ "to ctr_hard_link"); |
|
+ goto error; |
|
+ } |
|
+ |
|
+ ret = gettimeofday(¤t_time, NULL); |
|
+ if (ret == -1) { |
|
+ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); |
|
+ goto error; |
|
+ } |
|
+ |
|
+ /*Add the hard link to the list*/ |
|
+ list_add_tail(&ctr_hard_link->list, &ctr_xlator_ctx->hardlink_list); |
|
+ |
|
+ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; |
|
+ |
|
+ /*aal izz well!*/ |
|
+ ret = 0; |
|
+ goto out; |
|
+error: |
|
+ GF_FREE(ctr_hard_link); |
|
+out: |
|
+ return ret; |
|
+} |
|
+ |
|
+static void |
|
+__delete_hard_link_from_list(ctr_hard_link_t **ctr_hard_link) |
|
+{ |
|
+ GF_ASSERT(ctr_hard_link); |
|
+ GF_ASSERT(*ctr_hard_link); |
|
+ |
|
+ /*Remove hard link from list*/ |
|
+ list_del(&(*ctr_hard_link)->list); |
|
+ fini_ctr_hard_link(ctr_hard_link); |
|
+} |
|
+ |
|
+int |
|
+ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, |
|
+ uuid_t pgfid, const char *base_name) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_hard_link_t *ctr_hard_link = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(ctr_xlator_ctx); |
|
+ |
|
+ LOCK(&ctr_xlator_ctx->lock); |
|
+ |
|
+ /*Check if the hard link is present */ |
|
+ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, pgfid, |
|
+ base_name); |
|
+ if (!ctr_hard_link) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_HARDLINK_MISSING_IN_LIST, |
|
+ "Hard link doesn't exist in the list"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ __delete_hard_link_from_list(&ctr_hard_link); |
|
+ ctr_hard_link = NULL; |
|
+ |
|
+ ret = 0; |
|
+out: |
|
+ UNLOCK(&ctr_xlator_ctx->lock); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+int |
|
+ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, |
|
+ uuid_t pgfid, const char *base_name, uuid_t old_pgfid, |
|
+ const char *old_base_name) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_hard_link_t *ctr_hard_link = NULL; |
|
+ struct timeval current_time = {0}; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(ctr_xlator_ctx); |
|
+ |
|
+ LOCK(&ctr_xlator_ctx->lock); |
|
+ |
|
+ /*Check if the hard link is present */ |
|
+ ctr_hard_link = ctr_search_hard_link_ctx(this, ctr_xlator_ctx, old_pgfid, |
|
+ old_base_name); |
|
+ if (!ctr_hard_link) { |
|
+ gf_msg_trace(this->name, 0, |
|
+ "Hard link doesn't exist" |
|
+ " in the list"); |
|
+ /* Since the hard link is not present in the list |
|
+ * we add it to the list */ |
|
+ ret = ctr_add_hard_link(this, ctr_xlator_ctx, pgfid, base_name); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, |
|
+ CTR_MSG_ADD_HARDLINK_TO_LIST_FAILED, |
|
+ "Failed adding hard link to the list"); |
|
+ goto out; |
|
+ } |
|
+ ret = 0; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ /* update the hard link */ |
|
+ gf_uuid_copy(ctr_hard_link->pgfid, pgfid); |
|
+ GF_FREE(ctr_hard_link->base_name); |
|
+ ret = gf_asprintf(&ctr_hard_link->base_name, "%s", base_name); |
|
+ if (ret < 0) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, 0, CTR_MSG_COPY_FAILED, |
|
+ "Failed copying basename" |
|
+ "to ctr_hard_link"); |
|
+ /* delete the corrupted entry */ |
|
+ __delete_hard_link_from_list(&ctr_hard_link); |
|
+ ctr_hard_link = NULL; |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ret = gettimeofday(¤t_time, NULL); |
|
+ if (ret == -1) { |
|
+ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); |
|
+ ctr_hard_link->hardlink_heal_period = 0; |
|
+ } else { |
|
+ ctr_hard_link->hardlink_heal_period = current_time.tv_sec; |
|
+ } |
|
+ |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ UNLOCK(&ctr_xlator_ctx->lock); |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/* Delete all hardlinks */ |
|
+static int |
|
+ctr_delete_all_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx) |
|
+{ |
|
+ int ret = -1; |
|
+ ctr_hard_link_t *ctr_hard_link = NULL; |
|
+ ctr_hard_link_t *tmp = NULL; |
|
+ |
|
+ GF_ASSERT(ctr_xlator_ctx); |
|
+ |
|
+ LOCK(&ctr_xlator_ctx->lock); |
|
+ |
|
+ list_for_each_entry_safe(ctr_hard_link, tmp, &ctr_xlator_ctx->hardlink_list, |
|
+ list) |
|
+ { |
|
+ /*Remove hard link from list*/ |
|
+ __delete_hard_link_from_list(&ctr_hard_link); |
|
+ ctr_hard_link = NULL; |
|
+ } |
|
+ |
|
+ UNLOCK(&ctr_xlator_ctx->lock); |
|
+ |
|
+ ret = 0; |
|
+ |
|
+ return ret; |
|
+} |
|
+ |
|
+/* Please lock the inode before using this function */ |
|
+static ctr_xlator_ctx_t * |
|
+__get_ctr_xlator_ctx(xlator_t *this, inode_t *inode) |
|
+{ |
|
+ int ret = 0; |
|
+ uint64_t _addr = 0; |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(inode); |
|
+ |
|
+ ret = __inode_ctx_get(inode, this, &_addr); |
|
+ if (ret < 0) |
|
+ _addr = 0; |
|
+ if (_addr != 0) { |
|
+ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr; |
|
+ } |
|
+ |
|
+ return ctr_xlator_ctx; |
|
+} |
|
+ |
|
+ctr_xlator_ctx_t * |
|
+init_ctr_xlator_ctx(xlator_t *this, inode_t *inode) |
|
+{ |
|
+ int ret = -1; |
|
+ uint64_t _addr = 0; |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ struct timeval current_time = {0}; |
|
+ |
|
+ GF_ASSERT(this); |
|
+ GF_ASSERT(inode); |
|
+ |
|
+ LOCK(&inode->lock); |
|
+ { |
|
+ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode); |
|
+ if (ctr_xlator_ctx) { |
|
+ ret = 0; |
|
+ goto out; |
|
+ } |
|
+ ctr_xlator_ctx = GF_CALLOC(1, sizeof(*ctr_xlator_ctx), |
|
+ gf_ctr_mt_xlator_ctx); |
|
+ if (!ctr_xlator_ctx) |
|
+ goto out; |
|
+ |
|
+ ret = LOCK_INIT(&ctr_xlator_ctx->lock); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_ERROR, ret, CTR_MSG_INIT_LOCK_FAILED, |
|
+ "Failed init lock %s", strerror(ret)); |
|
+ goto out; |
|
+ } |
|
+ _addr = (uint64_t)(uintptr_t)ctr_xlator_ctx; |
|
+ |
|
+ ret = __inode_ctx_set(inode, this, &_addr); |
|
+ if (ret) { |
|
+ goto out; |
|
+ } |
|
+ |
|
+ INIT_LIST_HEAD(&ctr_xlator_ctx->hardlink_list); |
|
+ |
|
+ ret = gettimeofday(¤t_time, NULL); |
|
+ if (ret == -1) { |
|
+ gf_log(this->name, GF_LOG_ERROR, "Failed to get current time"); |
|
+ goto out; |
|
+ } |
|
+ |
|
+ ctr_xlator_ctx->inode_heal_period = current_time.tv_sec; |
|
+ } |
|
+ ret = 0; |
|
+out: |
|
+ if (ret) { |
|
+ GF_FREE(ctr_xlator_ctx); |
|
+ ctr_xlator_ctx = NULL; |
|
+ } |
|
+ |
|
+ UNLOCK(&inode->lock); |
|
+ |
|
+ return ctr_xlator_ctx; |
|
+} |
|
+ |
|
+void |
|
+fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode) |
|
+{ |
|
+ int ret = 0; |
|
+ uint64_t _addr = 0; |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ |
|
+ inode_ctx_del(inode, this, &_addr); |
|
+ if (!_addr) |
|
+ return; |
|
+ |
|
+ ctr_xlator_ctx = (ctr_xlator_ctx_t *)(long)_addr; |
|
+ |
|
+ ret = ctr_delete_all_hard_link(this, ctr_xlator_ctx); |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, CTR_MSG_DELETE_HARDLINK_FAILED, |
|
+ "Failed deleting all " |
|
+ "hard links from inode context"); |
|
+ } |
|
+ |
|
+ LOCK_DESTROY(&ctr_xlator_ctx->lock); |
|
+ |
|
+ GF_FREE(ctr_xlator_ctx); |
|
+} |
|
+ |
|
+ctr_xlator_ctx_t * |
|
+get_ctr_xlator_ctx(xlator_t *this, inode_t *inode) |
|
+{ |
|
+ ctr_xlator_ctx_t *ctr_xlator_ctx = NULL; |
|
+ |
|
+ LOCK(&inode->lock); |
|
+ ctr_xlator_ctx = __get_ctr_xlator_ctx(this, inode); |
|
+ UNLOCK(&inode->lock); |
|
+ |
|
+ return ctr_xlator_ctx; |
|
+} |
|
diff --git a/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h |
|
new file mode 100644 |
|
index 0000000..4e3bf7e |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/ctr-xlator-ctx.h |
|
@@ -0,0 +1,68 @@ |
|
+/* |
|
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#ifndef __CTR_XLATOR_CTX_H |
|
+#define __CTR_XLATOR_CTX_H |
|
+ |
|
+#include <glusterfs/xlator.h> |
|
+#include "ctr_mem_types.h" |
|
+#include <glusterfs/iatt.h> |
|
+#include <glusterfs/glusterfs.h> |
|
+#include <glusterfs/xlator.h> |
|
+#include <glusterfs/logging.h> |
|
+#include <glusterfs/locking.h> |
|
+#include <glusterfs/common-utils.h> |
|
+#include <time.h> |
|
+#include <sys/time.h> |
|
+ |
|
+typedef struct ctr_hard_link { |
|
+ uuid_t pgfid; |
|
+ char *base_name; |
|
+ /* Hardlink expiry : Defines the expiry period after which a |
|
+ * database heal is attempted. */ |
|
+ uint64_t hardlink_heal_period; |
|
+ struct list_head list; |
|
+} ctr_hard_link_t; |
|
+ |
|
+typedef struct ctr_xlator_ctx { |
|
+ /* This represents the looked up hardlinks |
|
+ * NOTE: This doesn't represent all physical hardlinks of the inode*/ |
|
+ struct list_head hardlink_list; |
|
+ uint64_t inode_heal_period; |
|
+ gf_lock_t lock; |
|
+} ctr_xlator_ctx_t; |
|
+ |
|
+ctr_hard_link_t * |
|
+ctr_search_hard_link_ctx(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, |
|
+ uuid_t pgfid, const char *base_name); |
|
+ |
|
+int |
|
+ctr_add_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, |
|
+ uuid_t pgfid, const char *base_name); |
|
+ |
|
+int |
|
+ctr_delete_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, |
|
+ uuid_t pgfid, const char *base_name); |
|
+ |
|
+int |
|
+ctr_update_hard_link(xlator_t *this, ctr_xlator_ctx_t *ctr_xlator_ctx, |
|
+ uuid_t pgfid, const char *base_name, uuid_t old_pgfid, |
|
+ const char *old_base_name); |
|
+ |
|
+ctr_xlator_ctx_t * |
|
+get_ctr_xlator_ctx(xlator_t *this, inode_t *inode); |
|
+ |
|
+ctr_xlator_ctx_t * |
|
+init_ctr_xlator_ctx(xlator_t *this, inode_t *inode); |
|
+ |
|
+void |
|
+fini_ctr_xlator_ctx(xlator_t *this, inode_t *inode); |
|
+ |
|
+#endif |
|
diff --git a/xlators/features/changetimerecorder/src/ctr_mem_types.h b/xlators/features/changetimerecorder/src/ctr_mem_types.h |
|
new file mode 100644 |
|
index 0000000..7b8f531 |
|
--- /dev/null |
|
+++ b/xlators/features/changetimerecorder/src/ctr_mem_types.h |
|
@@ -0,0 +1,22 @@ |
|
+/* |
|
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com> |
|
+ This file is part of GlusterFS. |
|
+ |
|
+ This file is licensed to you under your choice of the GNU Lesser |
|
+ General Public License, version 3 or any later version (LGPLv3 or |
|
+ later), or the GNU General Public License, version 2 (GPLv2), in all |
|
+ cases as published by the Free Software Foundation. |
|
+*/ |
|
+ |
|
+#ifndef __CTR_MEM_TYPES_H__ |
|
+#define __CTR_MEM_TYPES_H__ |
|
+ |
|
+#include "gfdb_mem-types.h" |
|
+ |
|
+enum gf_ctr_mem_types_ { |
|
+ gf_ctr_mt_private_t = gfdb_mt_end + 1, |
|
+ gf_ctr_mt_xlator_ctx, |
|
+ gf_ctr_mt_hard_link_t, |
|
+ gf_ctr_mt_end |
|
+}; |
|
+#endif |
|
-- |
|
1.8.3.1 |
|
|
|
|