From 39523fd6c1b4789b12c8db81f4e08a3eb0c6a65c Mon Sep 17 00:00:00 2001 From: Sunil Kumar Acharya Date: Thu, 17 Oct 2019 13:03:56 +0530 Subject: [PATCH 335/335] tests: RHEL8 test failure fixes for RHGS - tests/bugs/shard/bug-1272986.t https://review.gluster.org/#/c/glusterfs/+/23499/ https://review.gluster.org/#/c/glusterfs/+/23551/ - tests/basic/posix/shared-statfs.t https://review.gluster.org/c/glusterfs/+/23550 - tests/basic/fops-sanity.t https://review.gluster.org/c/glusterfs/+/22210/ - tests/bugs/transport/bug-873367.t - tests/features/ssl-authz.t - tests/bugs/snapshot/bug-1399598-uss-with-ssl.t https://review.gluster.org/#/c/glusterfs/+/23587/ - remove gnfs relatedtests - tests/bugs/shard/unlinks-and-renames.t https://review.gluster.org/#/c/glusterfs/+/23585/ - tests/bugs/rpc/bug-954057.t - tests/bugs/glusterfs-server/bug-887145.t https://review.gluster.org/#/c/glusterfs/+/23710/ - tests/features/ssl-ciphers.t https://review.gluster.org/#/c/glusterfs/+/23703/ - tests/bugs/fuse/bug-985074.t https://review.gluster.org/#/c/glusterfs/+/23734/ BUG: 1762180 Change-Id: I97b344a632b49ca9ca332a5a463756b160aee5bd Signed-off-by: Sunil Kumar Acharya Reviewed-on: https://code.engineering.redhat.com/gerrit/185716 Tested-by: RHGS Build Bot --- tests/basic/fops-sanity.c | 1862 ++-- tests/basic/posix/shared-statfs.t | 11 +- tests/bugs/cli/bug-1320388.t | 2 +- tests/bugs/fuse/bug-985074.t | 4 +- tests/bugs/glusterd/quorum-value-check.t | 35 - tests/bugs/glusterfs-server/bug-887145.t | 14 +- tests/bugs/nfs/bug-1053579.t | 114 - tests/bugs/nfs/bug-1116503.t | 47 - tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t | 24 - tests/bugs/nfs/bug-1157223-symlink-mounting.t | 126 - tests/bugs/nfs/bug-1161092-nfs-acls.t | 39 - tests/bugs/nfs/bug-1166862.t | 69 - tests/bugs/nfs/bug-1210338.c | 31 - tests/bugs/nfs/bug-1210338.t | 30 - tests/bugs/nfs/bug-1302948.t | 13 - tests/bugs/nfs/bug-847622.t | 39 - tests/bugs/nfs/bug-877885.t | 39 - tests/bugs/nfs/bug-904065.t | 100 - tests/bugs/nfs/bug-915280.t | 54 - tests/bugs/nfs/bug-970070.t | 13 - tests/bugs/nfs/bug-974972.t | 41 - tests/bugs/nfs/showmount-many-clients.t | 41 - tests/bugs/nfs/socket-as-fifo.py | 33 - tests/bugs/nfs/socket-as-fifo.t | 25 - tests/bugs/nfs/subdir-trailing-slash.t | 32 - tests/bugs/nfs/zero-atime.t | 33 - tests/bugs/rpc/bug-954057.t | 10 +- tests/bugs/shard/bug-1272986.t | 6 +- tests/bugs/transport/bug-873367.t | 2 +- tests/features/ssl-authz.t | 2 +- tests/features/ssl-ciphers.t | 61 +- tests/ssl.rc | 2 +- xlators/features/shard/src/shard.c | 11754 ++++++++++---------- 33 files changed, 6638 insertions(+), 8070 deletions(-) delete mode 100755 tests/bugs/glusterd/quorum-value-check.t delete mode 100755 tests/bugs/nfs/bug-1053579.t delete mode 100644 tests/bugs/nfs/bug-1116503.t delete mode 100644 tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t delete mode 100644 tests/bugs/nfs/bug-1157223-symlink-mounting.t delete mode 100644 tests/bugs/nfs/bug-1161092-nfs-acls.t delete mode 100755 tests/bugs/nfs/bug-1166862.t delete mode 100644 tests/bugs/nfs/bug-1210338.c delete mode 100644 tests/bugs/nfs/bug-1210338.t delete mode 100755 tests/bugs/nfs/bug-1302948.t delete mode 100755 tests/bugs/nfs/bug-847622.t delete mode 100755 tests/bugs/nfs/bug-877885.t delete mode 100755 tests/bugs/nfs/bug-904065.t delete mode 100755 tests/bugs/nfs/bug-915280.t delete mode 100755 tests/bugs/nfs/bug-970070.t delete mode 100755 tests/bugs/nfs/bug-974972.t delete mode 100644 tests/bugs/nfs/showmount-many-clients.t delete mode 100755 tests/bugs/nfs/socket-as-fifo.py delete mode 100644 tests/bugs/nfs/socket-as-fifo.t delete mode 100644 tests/bugs/nfs/subdir-trailing-slash.t delete mode 100755 tests/bugs/nfs/zero-atime.t diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c index aff72d8..171d003 100644 --- a/tests/basic/fops-sanity.c +++ b/tests/basic/fops-sanity.c @@ -17,15 +17,16 @@ /* Filesystem basic sanity check, tests all (almost) fops. */ -#include +#include +#include #include -#include -#include +#include +#include #include +#include +#include #include -#include -#include -#include +#include #ifndef linux #include @@ -34,904 +35,880 @@ #endif /* for fd based fops after unlink */ -int -fd_based_fops_1(char *filename); +int fd_based_fops_1(char *filename); /* for fd based fops before unlink */ -int -fd_based_fops_2(char *filename); +int fd_based_fops_2(char *filename); /* fops based on fd after dup */ -int -dup_fd_based_fops(char *filename); +int dup_fd_based_fops(char *filename); /* for fops based on path */ -int -path_based_fops(char *filename); +int path_based_fops(char *filename); /* for fops which operate on directory */ -int -dir_based_fops(char *filename); +int dir_based_fops(char *filename); /* for fops which operate in link files (symlinks) */ -int -link_based_fops(char *filename); +int link_based_fops(char *filename); /* to test open syscall with open modes available. */ -int -test_open_modes(char *filename); +int test_open_modes(char *filename); /* generic function which does open write and read. */ -int -generic_open_read_write(char *filename, int flag, mode_t mode); +int generic_open_read_write(char *filename, int flag, mode_t mode); #define OPEN_MODE 0666 -int -main(int argc, char *argv[]) -{ - int ret = -1; - int result = 0; - char filename[255] = { - 0, - }; - - if (argc > 1) - strcpy(filename, argv[1]); - else - strcpy(filename, "temp-xattr-test-file"); - - ret = fd_based_fops_1(strcat(filename, "_1")); - if (ret < 0) { - fprintf(stderr, "fd based file operation 1 failed\n"); - result |= ret; - } else { - fprintf(stdout, "fd based file operation 1 passed\n"); - } - - ret = fd_based_fops_2(strcat(filename, "_2")); - if (ret < 0) { - result |= ret; - fprintf(stderr, "fd based file operation 2 failed\n"); - } else { - fprintf(stdout, "fd based file operation 2 passed\n"); - } - - ret = dup_fd_based_fops(strcat(filename, "_3")); - if (ret < 0) { - result |= ret; - fprintf(stderr, "dup fd based file operation failed\n"); - } else { - fprintf(stdout, "dup fd based file operation passed\n"); - } - - ret = path_based_fops(strcat(filename, "_4")); - if (ret < 0) { - result |= ret; - fprintf(stderr, "path based file operation failed\n"); - } else { - fprintf(stdout, "path based file operation passed\n"); - } - - ret = dir_based_fops(strcat(filename, "_5")); - if (ret < 0) { - result |= ret; - fprintf(stderr, "directory based file operation failed\n"); - } else { - fprintf(stdout, "directory based file operation passed\n"); - } - - ret = link_based_fops(strcat(filename, "_5")); - if (ret < 0) { - result |= ret; - fprintf(stderr, "link based file operation failed\n"); - } else { - fprintf(stdout, "link based file operation passed\n"); - } - - ret = test_open_modes(strcat(filename, "_5")); - if (ret < 0) { - result |= ret; - fprintf(stderr, "testing modes of `open' call failed\n"); - } else { - fprintf(stdout, "testing modes of `open' call passed\n"); - } - return result; +int main(int argc, char *argv[]) { + int ret = -1; + int result = 0; + char filename[255] = { + 0, + }; + + if (argc > 1) + strcpy(filename, argv[1]); + else + strcpy(filename, "temp-xattr-test-file"); + + ret = fd_based_fops_1(strcat(filename, "_1")); + if (ret < 0) { + fprintf(stderr, "fd based file operation 1 failed\n"); + result |= ret; + } else { + fprintf(stdout, "fd based file operation 1 passed\n"); + } + + ret = fd_based_fops_2(strcat(filename, "_2")); + if (ret < 0) { + result |= ret; + fprintf(stderr, "fd based file operation 2 failed\n"); + } else { + fprintf(stdout, "fd based file operation 2 passed\n"); + } + + ret = dup_fd_based_fops(strcat(filename, "_3")); + if (ret < 0) { + result |= ret; + fprintf(stderr, "dup fd based file operation failed\n"); + } else { + fprintf(stdout, "dup fd based file operation passed\n"); + } + + ret = path_based_fops(strcat(filename, "_4")); + if (ret < 0) { + result |= ret; + fprintf(stderr, "path based file operation failed\n"); + } else { + fprintf(stdout, "path based file operation passed\n"); + } + + ret = dir_based_fops(strcat(filename, "_5")); + if (ret < 0) { + result |= ret; + fprintf(stderr, "directory based file operation failed\n"); + } else { + fprintf(stdout, "directory based file operation passed\n"); + } + + ret = link_based_fops(strcat(filename, "_5")); + if (ret < 0) { + result |= ret; + fprintf(stderr, "link based file operation failed\n"); + } else { + fprintf(stdout, "link based file operation passed\n"); + } + + ret = test_open_modes(strcat(filename, "_5")); + if (ret < 0) { + result |= ret; + fprintf(stderr, "testing modes of `open' call failed\n"); + } else { + fprintf(stdout, "testing modes of `open' call passed\n"); + } + return result; } /* Execute all possible fops on a fd which is unlinked */ -int -fd_based_fops_1(char *filename) -{ - int fd = 0; - int ret = -1; - int result = 0; - struct stat stbuf = { - 0, - }; - char wstr[50] = { - 0, - }; - char rstr[50] = { - 0, - }; - - fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); - if (fd < 0) { - fprintf(stderr, "open failed : %s\n", strerror(errno)); - return ret; - } - - ret = unlink(filename); - if (ret < 0) { - fprintf(stderr, "unlink failed : %s\n", strerror(errno)); - result |= ret; - } - - strcpy(wstr, "This is my string\n"); - ret = write(fd, wstr, strlen(wstr)); - if (ret <= 0) { - fprintf(stderr, "write failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = lseek(fd, 0, SEEK_SET); - if (ret < 0) { - fprintf(stderr, "lseek failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = read(fd, rstr, strlen(wstr)); - if (ret <= 0) { - fprintf(stderr, "read failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = memcmp(rstr, wstr, strlen(wstr)); - if (ret != 0) { - fprintf(stderr, "read returning junk\n"); - result |= ret; - } - - ret = ftruncate(fd, 0); - if (ret < 0) { - fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fstat(fd, &stbuf); - if (ret < 0) { - fprintf(stderr, "fstat failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fsync(fd); - if (ret < 0) { - fprintf(stderr, "fsync failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fdatasync(fd); - if (ret < 0) { - fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); - result |= ret; - } - - /* - * These metadata operations fail at the moment because kernel doesn't - * pass the client fd in the operation. - * The following bug tracks this change. - * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 - * ret = fchmod (fd, 0640); - * if (ret < 0) { - * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); - * result |= ret; - * } - - * ret = fchown (fd, 10001, 10001); - * if (ret < 0) { - * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); - * result |= ret; - * } - - * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); - * if (ret < 0) { - * fprintf (stderr, "fsetxattr failed : %s\n", strerror - (errno)); - * result |= ret; - * } - - * ret = flistxattr (fd, NULL, 0); - * if (ret <= 0) { - * fprintf (stderr, "flistxattr failed : %s\n", strerror - (errno)); - * result |= ret; - * } - - * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); - * if (ret <= 0) { - * fprintf (stderr, "fgetxattr failed : %s\n", strerror - (errno)); - * result |= ret; - * } - - * ret = fremovexattr (fd, "trusted.xattr-test"); - * if (ret < 0) { - * fprintf (stderr, "fremovexattr failed : %s\n", strerror - (errno)); - * result |= ret; - * } - */ - - if (fd) - close(fd); - return result; +int fd_based_fops_1(char *filename) { + int fd = 0; + int ret = -1; + int result = 0; + struct stat stbuf = { + 0, + }; + char wstr[50] = { + 0, + }; + char rstr[50] = { + 0, + }; + + fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); + if (fd < 0) { + fprintf(stderr, "open failed : %s\n", strerror(errno)); + return ret; + } + + ret = unlink(filename); + if (ret < 0) { + fprintf(stderr, "unlink failed : %s\n", strerror(errno)); + result |= ret; + } + + strcpy(wstr, "This is my string\n"); + ret = write(fd, wstr, strlen(wstr)); + if (ret <= 0) { + fprintf(stderr, "write failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + fprintf(stderr, "lseek failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = read(fd, rstr, strlen(wstr)); + if (ret <= 0) { + fprintf(stderr, "read failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = memcmp(rstr, wstr, strlen(wstr)); + if (ret != 0) { + fprintf(stderr, "read returning junk\n"); + result |= ret; + } + + ret = ftruncate(fd, 0); + if (ret < 0) { + fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fstat(fd, &stbuf); + if (ret < 0) { + fprintf(stderr, "fstat failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fsync(fd); + if (ret < 0) { + fprintf(stderr, "fsync failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fdatasync(fd); + if (ret < 0) { + fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); + result |= ret; + } + + /* + * These metadata operations fail at the moment because kernel doesn't + * pass the client fd in the operation. + * The following bug tracks this change. + * https://bugzilla.redhat.com/show_bug.cgi?id=1084422 + * ret = fchmod (fd, 0640); + * if (ret < 0) { + * fprintf (stderr, "fchmod failed : %s\n", strerror (errno)); + * result |= ret; + * } + + * ret = fchown (fd, 10001, 10001); + * if (ret < 0) { + * fprintf (stderr, "fchown failed : %s\n", strerror (errno)); + * result |= ret; + * } + + * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0); + * if (ret < 0) { + * fprintf (stderr, "fsetxattr failed : %s\n", strerror + (errno)); + * result |= ret; + * } + + * ret = flistxattr (fd, NULL, 0); + * if (ret <= 0) { + * fprintf (stderr, "flistxattr failed : %s\n", strerror + (errno)); + * result |= ret; + * } + + * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0); + * if (ret <= 0) { + * fprintf (stderr, "fgetxattr failed : %s\n", strerror + (errno)); + * result |= ret; + * } + + * ret = fremovexattr (fd, "trusted.xattr-test"); + * if (ret < 0) { + * fprintf (stderr, "fremovexattr failed : %s\n", strerror + (errno)); + * result |= ret; + * } + */ + + if (fd) + close(fd); + return result; } -int -fd_based_fops_2(char *filename) -{ - int fd = 0; - int ret = -1; - int result = 0; - struct stat stbuf = { - 0, - }; - char wstr[50] = { - 0, - }; - char rstr[50] = { - 0, - }; - - fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); - if (fd < 0) { - fprintf(stderr, "open failed : %s\n", strerror(errno)); - return ret; - } - - ret = ftruncate(fd, 0); - if (ret < 0) { - fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); - result |= ret; - } - - strcpy(wstr, "This is my second string\n"); - ret = write(fd, wstr, strlen(wstr)); - if (ret < 0) { - fprintf(stderr, "write failed: %s\n", strerror(errno)); - result |= ret; - } - - lseek(fd, 0, SEEK_SET); - if (ret < 0) { - fprintf(stderr, "lseek failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = read(fd, rstr, strlen(wstr)); - if (ret <= 0) { - fprintf(stderr, "read failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = memcmp(rstr, wstr, strlen(wstr)); - if (ret != 0) { - fprintf(stderr, "read returning junk\n"); - result |= ret; - } - - ret = fstat(fd, &stbuf); - if (ret < 0) { - fprintf(stderr, "fstat failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fchmod(fd, 0640); - if (ret < 0) { - fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fchown(fd, 10001, 10001); - if (ret < 0) { - fprintf(stderr, "fchown failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fsync(fd); - if (ret < 0) { - fprintf(stderr, "fsync failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); - if (ret < 0) { - fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fdatasync(fd); - if (ret < 0) { - fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = flistxattr(fd, NULL, 0); - if (ret <= 0) { - fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); - if (ret <= 0) { - fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fremovexattr(fd, "trusted.xattr-test"); - if (ret < 0) { - fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); - result |= ret; - } - - if (fd) - close(fd); - unlink(filename); +int fd_based_fops_2(char *filename) { + int fd = 0; + int ret = -1; + int result = 0; + struct stat stbuf = { + 0, + }; + char wstr[50] = { + 0, + }; + char rstr[50] = { + 0, + }; + + fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); + if (fd < 0) { + fprintf(stderr, "open failed : %s\n", strerror(errno)); + return ret; + } + + ret = ftruncate(fd, 0); + if (ret < 0) { + fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); + result |= ret; + } + + strcpy(wstr, "This is my second string\n"); + ret = write(fd, wstr, strlen(wstr)); + if (ret < 0) { + fprintf(stderr, "write failed: %s\n", strerror(errno)); + result |= ret; + } + + lseek(fd, 0, SEEK_SET); + if (ret < 0) { + fprintf(stderr, "lseek failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = read(fd, rstr, strlen(wstr)); + if (ret <= 0) { + fprintf(stderr, "read failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = memcmp(rstr, wstr, strlen(wstr)); + if (ret != 0) { + fprintf(stderr, "read returning junk\n"); + result |= ret; + } + + ret = fstat(fd, &stbuf); + if (ret < 0) { + fprintf(stderr, "fstat failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fchmod(fd, 0640); + if (ret < 0) { + fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fchown(fd, 10001, 10001); + if (ret < 0) { + fprintf(stderr, "fchown failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fsync(fd); + if (ret < 0) { + fprintf(stderr, "fsync failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0); + if (ret < 0) { + fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fdatasync(fd); + if (ret < 0) { + fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = flistxattr(fd, NULL, 0); + if (ret <= 0) { + fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0); + if (ret <= 0) { + fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fremovexattr(fd, "trusted.xattr-test"); + if (ret < 0) { + fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); + result |= ret; + } + + if (fd) + close(fd); + unlink(filename); - return result; + return result; } -int -path_based_fops(char *filename) -{ - int ret = -1; - int fd = 0; - int result = 0; - struct stat stbuf = { - 0, - }; - char newfilename[255] = { - 0, - }; - char *hardlink = "linkfile-hard.txt"; - char *symlnk = "linkfile-soft.txt"; - char buf[1024] = { - 0, - }; - - fd = creat(filename, 0644); - if (fd < 0) { - fprintf(stderr, "creat failed: %s\n", strerror(errno)); - return ret; - } - - ret = truncate(filename, 0); - if (ret < 0) { - fprintf(stderr, "truncate failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = stat(filename, &stbuf); - if (ret < 0) { - fprintf(stderr, "stat failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = chmod(filename, 0640); - if (ret < 0) { - fprintf(stderr, "chmod failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = chown(filename, 10001, 10001); - if (ret < 0) { - fprintf(stderr, "chown failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); - if (ret < 0) { - fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = listxattr(filename, NULL, 0); - if (ret <= 0) { - ret = -1; - fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = getxattr(filename, "trusted.xattr-test", NULL, 0); - if (ret <= 0) { - fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = removexattr(filename, "trusted.xattr-test"); - if (ret < 0) { - fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = access(filename, R_OK | W_OK); - if (ret < 0) { - fprintf(stderr, "access failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = link(filename, hardlink); - if (ret < 0) { - fprintf(stderr, "link failed: %s\n", strerror(errno)); - result |= ret; - } - unlink(hardlink); - - ret = symlink(filename, symlnk); - if (ret < 0) { - fprintf(stderr, "symlink failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = readlink(symlnk, buf, sizeof(buf)); - if (ret < 0) { - fprintf(stderr, "readlink failed: %s\n", strerror(errno)); - result |= ret; - } - unlink(symlnk); - - /* Create a character special file */ - ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); - if (ret < 0) { - fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); - result |= ret; - } - unlink("cspecial"); - - ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); - if (ret < 0) { - fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); - result |= ret; - } - unlink("bspecial"); +int path_based_fops(char *filename) { + int ret = -1; + int fd = 0; + int result = 0; + struct stat stbuf = { + 0, + }; + char newfilename[255] = { + 0, + }; + char *hardlink = "linkfile-hard.txt"; + char *symlnk = "linkfile-soft.txt"; + char buf[1024] = { + 0, + }; + + fd = creat(filename, 0644); + if (fd < 0) { + fprintf(stderr, "creat failed: %s\n", strerror(errno)); + return ret; + } + + ret = truncate(filename, 0); + if (ret < 0) { + fprintf(stderr, "truncate failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = stat(filename, &stbuf); + if (ret < 0) { + fprintf(stderr, "stat failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = chmod(filename, 0640); + if (ret < 0) { + fprintf(stderr, "chmod failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = chown(filename, 10001, 10001); + if (ret < 0) { + fprintf(stderr, "chown failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0); + if (ret < 0) { + fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = listxattr(filename, NULL, 0); + if (ret <= 0) { + ret = -1; + fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = getxattr(filename, "trusted.xattr-test", NULL, 0); + if (ret <= 0) { + fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = removexattr(filename, "trusted.xattr-test"); + if (ret < 0) { + fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = access(filename, R_OK | W_OK); + if (ret < 0) { + fprintf(stderr, "access failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = link(filename, hardlink); + if (ret < 0) { + fprintf(stderr, "link failed: %s\n", strerror(errno)); + result |= ret; + } + unlink(hardlink); + + ret = symlink(filename, symlnk); + if (ret < 0) { + fprintf(stderr, "symlink failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = readlink(symlnk, buf, sizeof(buf)); + if (ret < 0) { + fprintf(stderr, "readlink failed: %s\n", strerror(errno)); + result |= ret; + } + unlink(symlnk); + + /* Create a character special file */ + ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3)); + if (ret < 0) { + fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno)); + result |= ret; + } + unlink("cspecial"); + + ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5)); + if (ret < 0) { + fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno)); + result |= ret; + } + unlink("bspecial"); #ifdef linux - ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); + ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0); #else - ret = mkfifo("fifo", 0); + ret = mkfifo("fifo", 0); #endif - if (ret < 0) { - fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); - result |= ret; - } - unlink("fifo"); + if (ret < 0) { + fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); + result |= ret; + } + unlink("fifo"); #ifdef linux - ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); - if (ret < 0) { - fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); - result |= ret; - } + ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0); + if (ret < 0) { + fprintf(stderr, "sock mknod failed: %s\n", strerror(errno)); + result |= ret; + } #else - { - int s; - const char *pathname = "sock"; - struct sockaddr_un addr; - - s = socket(PF_LOCAL, SOCK_STREAM, 0); - memset(&addr, 0, sizeof(addr)); - strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); - ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); - if (ret < 0) { - fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); - result |= ret; - } - close(s); - } -#endif - unlink("sock"); + { + int s; + const char *pathname = "sock"; + struct sockaddr_un addr; - strcpy(newfilename, filename); - strcat(newfilename, "_new"); - ret = rename(filename, newfilename); + s = socket(PF_LOCAL, SOCK_STREAM, 0); + memset(&addr, 0, sizeof(addr)); + strncpy(addr.sun_path, pathname, sizeof(addr.sun_path)); + ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr)); if (ret < 0) { - fprintf(stderr, "rename failed: %s\n", strerror(errno)); - result |= ret; - } - unlink(newfilename); - - if (fd) - close(fd); - - unlink(filename); - return result; -} - -int -dup_fd_based_fops(char *filename) -{ - int fd = 0; - int result = 0; - int newfd = 0; - int ret = -1; - struct stat stbuf = { - 0, - }; - char wstr[50] = { - 0, - }; - char rstr[50] = { - 0, - }; - - fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); - if (fd < 0) { - fprintf(stderr, "open failed : %s\n", strerror(errno)); - return ret; - } - - newfd = dup(fd); - if (newfd < 0) { - fprintf(stderr, "dup failed: %s\n", strerror(errno)); - result |= ret; + fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno)); + result |= ret; } - + close(s); + } +#endif + unlink("sock"); + + strcpy(newfilename, filename); + strcat(newfilename, "_new"); + ret = rename(filename, newfilename); + if (ret < 0) { + fprintf(stderr, "rename failed: %s\n", strerror(errno)); + result |= ret; + } + unlink(newfilename); + + if (fd) close(fd); - strcpy(wstr, "This is my string\n"); - ret = write(newfd, wstr, strlen(wstr)); - if (ret <= 0) { - fprintf(stderr, "write failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = lseek(newfd, 0, SEEK_SET); - if (ret < 0) { - fprintf(stderr, "lseek failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = read(newfd, rstr, strlen(wstr)); - if (ret <= 0) { - fprintf(stderr, "read failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = memcmp(rstr, wstr, strlen(wstr)); - if (ret != 0) { - fprintf(stderr, "read returning junk\n"); - result |= ret; - } - - ret = ftruncate(newfd, 0); - if (ret < 0) { - fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fstat(newfd, &stbuf); - if (ret < 0) { - fprintf(stderr, "fstat failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fchmod(newfd, 0640); - if (ret < 0) { - fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fchown(newfd, 10001, 10001); - if (ret < 0) { - fprintf(stderr, "fchown failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fsync(newfd); - if (ret < 0) { - fprintf(stderr, "fsync failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); - if (ret < 0) { - fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fdatasync(newfd); - if (ret < 0) { - fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = flistxattr(newfd, NULL, 0); - if (ret <= 0) { - fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); - if (ret <= 0) { - fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); - result |= ret; - } - - ret = fremovexattr(newfd, "trusted.xattr-test"); - if (ret < 0) { - fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); - result |= ret; - } - - if (newfd) - close(newfd); - ret = unlink(filename); - if (ret < 0) { - fprintf(stderr, "unlink failed : %s\n", strerror(errno)); - result |= ret; - } - return result; + unlink(filename); + return result; } -int -dir_based_fops(char *dirname) -{ - int ret = -1; - int result = 0; - DIR *dp = NULL; - char buff[255] = { - 0, - }; - struct dirent *dbuff = { - 0, - }; - struct stat stbuff = { - 0, - }; - char newdname[255] = { - 0, - }; - char *cwd = NULL; - - ret = mkdir(dirname, 0755); - if (ret < 0) { - fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); - return ret; - } - - dp = opendir(dirname); - if (dp == NULL) { - fprintf(stderr, "opendir failed: %s\n", strerror(errno)); - result |= ret; - } - - dbuff = readdir(dp); - if (NULL == dbuff) { - fprintf(stderr, "readdir failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = closedir(dp); - if (ret < 0) { - fprintf(stderr, "closedir failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = stat(dirname, &stbuff); - if (ret < 0) { - fprintf(stderr, "stat failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = chmod(dirname, 0744); - if (ret < 0) { - fprintf(stderr, "chmod failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = chown(dirname, 10001, 10001); - if (ret < 0) { - fprintf(stderr, "chmod failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); - if (ret < 0) { - fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = listxattr(dirname, NULL, 0); - if (ret <= 0) { - ret = -1; - fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); - if (ret <= 0) { - ret = -1; - fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = removexattr(dirname, "trusted.xattr-test"); - if (ret < 0) { - fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); - result |= ret; - } - - strcpy(newdname, dirname); - strcat(newdname, "/../"); - ret = chdir(newdname); - if (ret < 0) { - fprintf(stderr, "chdir failed: %s\n", strerror(errno)); - result |= ret; - } - - cwd = getcwd(buff, 255); - if (NULL == cwd) { - fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); - result |= ret; - } - - strcpy(newdname, dirname); - strcat(newdname, "new"); - ret = rename(dirname, newdname); - if (ret < 0) { - fprintf(stderr, "rename failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = rmdir(newdname); - if (ret < 0) { - fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); - result |= ret; - } - - rmdir(dirname); - return result; +int dup_fd_based_fops(char *filename) { + int fd = 0; + int result = 0; + int newfd = 0; + int ret = -1; + struct stat stbuf = { + 0, + }; + char wstr[50] = { + 0, + }; + char rstr[50] = { + 0, + }; + + fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE); + if (fd < 0) { + fprintf(stderr, "open failed : %s\n", strerror(errno)); + return ret; + } + + newfd = dup(fd); + if (newfd < 0) { + fprintf(stderr, "dup failed: %s\n", strerror(errno)); + result |= ret; + } + + close(fd); + + strcpy(wstr, "This is my string\n"); + ret = write(newfd, wstr, strlen(wstr)); + if (ret <= 0) { + fprintf(stderr, "write failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = lseek(newfd, 0, SEEK_SET); + if (ret < 0) { + fprintf(stderr, "lseek failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = read(newfd, rstr, strlen(wstr)); + if (ret <= 0) { + fprintf(stderr, "read failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = memcmp(rstr, wstr, strlen(wstr)); + if (ret != 0) { + fprintf(stderr, "read returning junk\n"); + result |= ret; + } + + ret = ftruncate(newfd, 0); + if (ret < 0) { + fprintf(stderr, "ftruncate failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fstat(newfd, &stbuf); + if (ret < 0) { + fprintf(stderr, "fstat failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fchmod(newfd, 0640); + if (ret < 0) { + fprintf(stderr, "fchmod failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fchown(newfd, 10001, 10001); + if (ret < 0) { + fprintf(stderr, "fchown failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fsync(newfd); + if (ret < 0) { + fprintf(stderr, "fsync failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0); + if (ret < 0) { + fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fdatasync(newfd); + if (ret < 0) { + fprintf(stderr, "fdatasync failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = flistxattr(newfd, NULL, 0); + if (ret <= 0) { + fprintf(stderr, "flistxattr failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0); + if (ret <= 0) { + fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno)); + result |= ret; + } + + ret = fremovexattr(newfd, "trusted.xattr-test"); + if (ret < 0) { + fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno)); + result |= ret; + } + + if (newfd) + close(newfd); + ret = unlink(filename); + if (ret < 0) { + fprintf(stderr, "unlink failed : %s\n", strerror(errno)); + result |= ret; + } + return result; } -int -link_based_fops(char *filename) -{ - int ret = -1; - int result = 0; - int fd = 0; - char newname[255] = { - 0, - }; - char linkname[255] = { - 0, - }; - struct stat lstbuf = { - 0, - }; - - fd = creat(filename, 0644); - if (fd < 0) { - fd = 0; - fprintf(stderr, "creat failed: %s\n", strerror(errno)); - return ret; - } - - strcpy(newname, filename); - strcat(newname, "_hlink"); - ret = link(filename, newname); - if (ret < 0) { - fprintf(stderr, "link failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = unlink(filename); - if (ret < 0) { - fprintf(stderr, "unlink failed: %s\n", strerror(errno)); - result |= ret; - } - - strcpy(linkname, filename); - strcat(linkname, "_slink"); - ret = symlink(newname, linkname); - if (ret < 0) { - fprintf(stderr, "symlink failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = lstat(linkname, &lstbuf); - if (ret < 0) { - fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = lchown(linkname, 10001, 10001); - if (ret < 0) { - fprintf(stderr, "lchown failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); - if (ret < 0) { - fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = llistxattr(linkname, NULL, 0); - if (ret < 0) { - ret = -1; - fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); - if (ret < 0) { - ret = -1; - fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); - result |= ret; - } - - ret = lremovexattr(linkname, "trusted.lxattr-test"); - if (ret < 0) { - fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); - result |= ret; - } - - if (fd) - close(fd); - unlink(linkname); - unlink(newname); - return result; +int dir_based_fops(char *dirname) { + int ret = -1; + int result = 0; + DIR *dp = NULL; + char buff[255] = { + 0, + }; + struct dirent *dbuff = { + 0, + }; + struct stat stbuff = { + 0, + }; + char newdname[255] = { + 0, + }; + char *cwd = NULL; + + ret = mkdir(dirname, 0755); + if (ret < 0) { + fprintf(stderr, "mkdir failed: %s\n", strerror(errno)); + return ret; + } + + dp = opendir(dirname); + if (dp == NULL) { + fprintf(stderr, "opendir failed: %s\n", strerror(errno)); + result |= ret; + } + + dbuff = readdir(dp); + if (NULL == dbuff) { + fprintf(stderr, "readdir failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = closedir(dp); + if (ret < 0) { + fprintf(stderr, "closedir failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = stat(dirname, &stbuff); + if (ret < 0) { + fprintf(stderr, "stat failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = chmod(dirname, 0744); + if (ret < 0) { + fprintf(stderr, "chmod failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = chown(dirname, 10001, 10001); + if (ret < 0) { + fprintf(stderr, "chmod failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0); + if (ret < 0) { + fprintf(stderr, "setxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = listxattr(dirname, NULL, 0); + if (ret <= 0) { + ret = -1; + fprintf(stderr, "listxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = getxattr(dirname, "trusted.xattr-test", NULL, 0); + if (ret <= 0) { + ret = -1; + fprintf(stderr, "getxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = removexattr(dirname, "trusted.xattr-test"); + if (ret < 0) { + fprintf(stderr, "removexattr failed: %s\n", strerror(errno)); + result |= ret; + } + + strcpy(newdname, dirname); + strcat(newdname, "/../"); + ret = chdir(newdname); + if (ret < 0) { + fprintf(stderr, "chdir failed: %s\n", strerror(errno)); + result |= ret; + } + + cwd = getcwd(buff, 255); + if (NULL == cwd) { + fprintf(stderr, "getcwd failed: %s\n", strerror(errno)); + result |= ret; + } + + strcpy(newdname, dirname); + strcat(newdname, "new"); + ret = rename(dirname, newdname); + if (ret < 0) { + fprintf(stderr, "rename failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = rmdir(newdname); + if (ret < 0) { + fprintf(stderr, "rmdir failed: %s\n", strerror(errno)); + result |= ret; + } + + rmdir(dirname); + return result; } -int -test_open_modes(char *filename) -{ - int ret = -1; - int result = 0; - - ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); - if (ret != 0) { - fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); - result |= ret; - } - - ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); - if (ret != 0) { - fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); - result |= ret; - } - - ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); - if (ret != 0) { - fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); - result |= ret; - } - - ret = creat(filename, 0644); - close(ret); - ret = generic_open_read_write(filename, O_WRONLY, 0); - if (ret != 0) { - fprintf(stderr, "flag O_WRONLY failed\n"); - result |= ret; - } - - ret = creat(filename, 0644); - close(ret); - ret = generic_open_read_write(filename, O_RDWR, 0); - if (0 != ret) { - fprintf(stderr, "flag O_RDWR failed\n"); - result |= ret; - } - - ret = creat(filename, 0644); - close(ret); - ret = generic_open_read_write(filename, O_RDONLY, 0); - if (0 != ret) { - fprintf(stderr, "flag O_RDONLY failed\n"); - result |= ret; - } +int link_based_fops(char *filename) { + int ret = -1; + int result = 0; + int fd = 0; + char newname[255] = { + 0, + }; + char linkname[255] = { + 0, + }; + struct stat lstbuf = { + 0, + }; + + fd = creat(filename, 0644); + if (fd < 0) { + fd = 0; + fprintf(stderr, "creat failed: %s\n", strerror(errno)); + return ret; + } + + strcpy(newname, filename); + strcat(newname, "_hlink"); + ret = link(filename, newname); + if (ret < 0) { + fprintf(stderr, "link failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = unlink(filename); + if (ret < 0) { + fprintf(stderr, "unlink failed: %s\n", strerror(errno)); + result |= ret; + } + + strcpy(linkname, filename); + strcat(linkname, "_slink"); + ret = symlink(newname, linkname); + if (ret < 0) { + fprintf(stderr, "symlink failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = lstat(linkname, &lstbuf); + if (ret < 0) { + fprintf(stderr, "lstbuf failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = lchown(linkname, 10001, 10001); + if (ret < 0) { + fprintf(stderr, "lchown failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0); + if (ret < 0) { + fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = llistxattr(linkname, NULL, 0); + if (ret < 0) { + ret = -1; + fprintf(stderr, "llistxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0); + if (ret < 0) { + ret = -1; + fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno)); + result |= ret; + } + + ret = lremovexattr(linkname, "trusted.lxattr-test"); + if (ret < 0) { + fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno)); + result |= ret; + } + + if (fd) + close(fd); + unlink(linkname); + unlink(newname); + return result; +} - ret = creat(filename, 0644); - close(ret); - ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); - if (0 != ret) { - fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); - result |= ret; - } +int test_open_modes(char *filename) { + int ret = -1; + int result = 0; + + ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE); + if (ret != 0) { + fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n"); + result |= ret; + } + + ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE); + if (ret != 0) { + fprintf(stderr, "flag O_CREAT|O_RDWR failed\n"); + result |= ret; + } + + ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE); + if (ret != 0) { + fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n"); + result |= ret; + } + + ret = creat(filename, 0644); + close(ret); + ret = generic_open_read_write(filename, O_WRONLY, 0); + if (ret != 0) { + fprintf(stderr, "flag O_WRONLY failed\n"); + result |= ret; + } + + ret = creat(filename, 0644); + close(ret); + ret = generic_open_read_write(filename, O_RDWR, 0); + if (0 != ret) { + fprintf(stderr, "flag O_RDWR failed\n"); + result |= ret; + } + + ret = creat(filename, 0644); + close(ret); + ret = generic_open_read_write(filename, O_RDONLY, 0); + if (0 != ret) { + fprintf(stderr, "flag O_RDONLY failed\n"); + result |= ret; + } + + ret = creat(filename, 0644); + close(ret); + ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0); + if (0 != ret) { + fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n"); + result |= ret; + } #if 0 /* undefined behaviour, unable to reliably test */ ret = creat (filename, 0644); @@ -943,90 +920,87 @@ test_open_modes(char *filename) } #endif - ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, - OPEN_MODE); - if (0 != ret) { - fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); - result |= ret; - } - - ret = creat(filename, 0644); - close(ret); - ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); - if (0 != ret) { - fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); - result |= ret; - } - - return result; + ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC, OPEN_MODE); + if (0 != ret) { + fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n"); + result |= ret; + } + + ret = creat(filename, 0644); + close(ret); + ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE); + if (0 != ret) { + fprintf(stderr, "flag O_CREAT|O_EXCL failed\n"); + result |= ret; + } + + return result; } -int -generic_open_read_write(char *filename, int flag, mode_t mode) -{ - int fd = 0; - int ret = -1; - char wstring[50] = { - 0, - }; - char rstring[50] = { - 0, - }; - - fd = open(filename, flag, mode); - if (fd < 0) { - if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { - unlink(filename); - return 0; - } else { - fprintf(stderr, "open failed: %s\n", strerror(errno)); - return -1; - } - } - - strcpy(wstring, "My string to write\n"); - ret = write(fd, wstring, strlen(wstring)); - if (ret <= 0) { - if (errno != EBADF) { - fprintf(stderr, "write failed: %s\n", strerror(errno)); - close(fd); - unlink(filename); - return ret; - } - } - - ret = lseek(fd, 0, SEEK_SET); - if (ret < 0) { - close(fd); - unlink(filename); - return ret; +int generic_open_read_write(char *filename, int flag, mode_t mode) { + int fd = 0; + int ret = -1; + char wstring[50] = { + 0, + }; + char rstring[50] = { + 0, + }; + + fd = open(filename, flag, mode); + if (fd < 0) { + if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) { + unlink(filename); + return 0; + } else { + fprintf(stderr, "open failed: %s\n", strerror(errno)); + return -1; } + } - ret = read(fd, rstring, strlen(wstring)); - if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && - flag != (O_TRUNC | O_WRONLY)) { - close(fd); - unlink(filename); - return ret; + strcpy(wstring, "My string to write\n"); + ret = write(fd, wstring, strlen(wstring)); + if (ret <= 0) { + if (errno != EBADF) { + fprintf(stderr, "write failed: %s\n", strerror(errno)); + close(fd); + unlink(filename); + return ret; } + } - /* Compare the rstring with wstring. But we do not want to return - * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or - * O_TRUNC|O_RDONLY. Because in that case we are not writing - * anything to the file.*/ - - ret = memcmp(wstring, rstring, strlen(wstring)); - if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && - flag != (O_CREAT | O_WRONLY) && - !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || - flag == (O_TRUNC | O_RDONLY))) { - fprintf(stderr, "read is returning junk\n"); - close(fd); - unlink(filename); - return ret; - } + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + close(fd); + unlink(filename); + return ret; + } + ret = read(fd, rstring, strlen(wstring)); + if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY && + flag != (O_TRUNC | O_WRONLY)) { + close(fd); + unlink(filename); + return ret; + } + + /* Compare the rstring with wstring. But we do not want to return + * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or + * O_TRUNC|O_RDONLY. Because in that case we are not writing + * anything to the file.*/ + + ret = memcmp(wstring, rstring, strlen(wstring)); + if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY && + flag != (O_CREAT | O_WRONLY) && + !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY || + flag == (O_TRUNC | O_RDONLY))) { + fprintf(stderr, "read is returning junk\n"); close(fd); unlink(filename); - return 0; + return ret; + } + + close(fd); + unlink(filename); + return 0; } diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t index 3343956..0e4a1bb 100644 --- a/tests/basic/posix/shared-statfs.t +++ b/tests/basic/posix/shared-statfs.t @@ -20,15 +20,18 @@ TEST mkdir -p $B0/${V0}1 $B0/${V0}2 TEST MOUNT_LOOP $LO1 $B0/${V0}1 TEST MOUNT_LOOP $LO2 $B0/${V0}2 +total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}') +#Account for rounding error +brick_blocks_two_percent_less=$((total_brick_blocks*98/100)) # Create a subdir in mountpoint and use that for volume. TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; TEST $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 -total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') # Keeping the size less than 200M mainly because XFS will use # some storage in brick to keep its own metadata. -TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] +TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] TEST force_umount $M0 @@ -41,8 +44,8 @@ TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/ TEST $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 -total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') -TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] +total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}') +TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ] TEST force_umount $M0 TEST $CLI volume stop $V0 diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t index 8e5d77b..e719fc5 100755 --- a/tests/bugs/cli/bug-1320388.t +++ b/tests/bugs/cli/bug-1320388.t @@ -21,7 +21,7 @@ cleanup; rm -f $SSL_BASE/glusterfs.* touch "$GLUSTERD_WORKDIR"/secure-access -TEST openssl genrsa -out $SSL_KEY 3072 +TEST openssl genrsa -out $SSL_KEY 2048 TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT ln $SSL_CERT $SSL_CA diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t index d10fd9f..26d196e 100644 --- a/tests/bugs/fuse/bug-985074.t +++ b/tests/bugs/fuse/bug-985074.t @@ -30,7 +30,7 @@ TEST glusterd TEST $CLI volume create $V0 $H0:$B0/$V0 TEST $CLI volume start $V0 -TEST $CLI volume set $V0 md-cache-timeout 3 +TEST $CLI volume set $V0 performance.stat-prefetch off TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0 TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0 @@ -40,8 +40,6 @@ TEST ln $M0/file $M0/file.link TEST ls -ali $M0 $M1 TEST rm -f $M1/file.link TEST ls -ali $M0 $M1 -# expire the md-cache timeout -sleep 3 TEST mv $M0/file $M0/file.link TEST stat $M0/file.link TEST ! stat $M0/file diff --git a/tests/bugs/glusterd/quorum-value-check.t b/tests/bugs/glusterd/quorum-value-check.t deleted file mode 100755 index aaf6362..0000000 --- a/tests/bugs/glusterd/quorum-value-check.t +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc - -function check_quorum_nfs() { - local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')" - local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')" - - if [ $qnfs = $qinfo ]; then - echo "Y" - else - echo "N" - fi -} - -cleanup; - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} -TEST $CLI volume set $V0 nfs.disable off -TEST $CLI volume set $V0 performance.write-behind off -TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST $CLI volume set $V0 cluster.quorum-type fixed -TEST $CLI volume start $V0 - -TEST $CLI volume set $V0 cluster.quorum-count 1 -EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs -TEST $CLI volume set $V0 cluster.quorum-count 2 -EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs -TEST $CLI volume set $V0 cluster.quorum-count 3 -EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs - -cleanup; diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t index 82f7cca..f65b1bd 100755 --- a/tests/bugs/glusterfs-server/bug-887145.t +++ b/tests/bugs/glusterfs-server/bug-887145.t @@ -29,7 +29,15 @@ chmod 600 $M0/file; TEST mount_nfs $H0:/$V0 $N0 nolock; -chown -R nfsnobody:nfsnobody $M0/dir; +grep nfsnobody /etc/passwd > /dev/nul +if [ $? -eq 1 ]; then +usr=nobody +grp=nobody +else +usr=nfsnobody +grp=nfsnobody +fi +chown -R $usr:$grp $M0/dir; chown -R tmp_user:tmp_user $M0/other; TEST $CLI volume set $V0 server.root-squash on; @@ -38,7 +46,7 @@ EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; # create files and directories in the root of the glusterfs and nfs mount # which is owned by root and hence the right behavior is getting EACCESS -# as the fops are executed as nfsnobody. +# as the fops are executed as nfsnobody/nobody. touch $M0/foo 2>/dev/null; TEST [ $? -ne 0 ] touch $N0/foo 2>/dev/null; @@ -61,7 +69,7 @@ cat $N0/passwd 1>/dev/null; TEST [ $? -eq 0 ] # create files and directories should succeed as the fops are being executed -# inside the directory owned by nfsnobody +# inside the directory owned by nfsnobody/nobody TEST touch $M0/dir/file; TEST touch $N0/dir/foo; TEST mkdir $M0/dir/new; diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t deleted file mode 100755 index 2f53172..0000000 --- a/tests/bugs/nfs/bug-1053579.t +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup - -# prepare the users and groups -NEW_USER=bug1053579 -NEW_UID=1053579 -NEW_GID=1053579 -LAST_GID=1053779 -NEW_GIDS=${NEW_GID} - -# OS-specific overrides -case $OSTYPE in -NetBSD|Darwin) - # only NGROUPS_MAX=16 secondary groups are supported - LAST_GID=1053593 - ;; -FreeBSD) - # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups - ;; -Linux) - # NGROUPS_MAX=65536, we can afford 200 groups - ;; -*) - ;; -esac - -# create a user that belongs to many groups -for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) -do - groupadd -o -g ${GID} ${NEW_USER}-${GID} - NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}" -done -TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER} - -# preparation done, start the tests - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 $H0:$B0/${V0}1 -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume set $V0 nfs.server-aux-gids on -TEST $CLI volume start $V0 - -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available - -# mount the volume -TEST mount_nfs $H0:/$V0 $N0 nolock -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 - -# the actual test, this used to crash -su -m ${NEW_USER} -c "stat $N0/. > /dev/null" -TEST [ $? -eq 0 ] - -# create a file that only a user in a high-group can access -echo 'Hello World!' > $N0/README -chgrp ${LAST_GID} $N0/README -chmod 0640 $N0/README - -#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" -su -m ${NEW_USER} -c "cat $N0/README" -ret=$? - -case $OSTYPE in -Linux) # Linux NFS fails with big GID - if [ $ret -ne 0 ] ; then - res="Y" - else - res="N" - fi - ;; -*) # Other systems should cope better - if [ $ret -eq 0 ] ; then - res="Y" - else - res="N" - fi - ;; -esac -TEST [ "x$res" = "xY" ] - -# This passes only on build.gluster.org, not reproducible on other machines?! -#su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" -#TEST [ $? -ne 0 ] - -# enable server.manage-gids and things should work -TEST $CLI volume set $V0 server.manage-gids on - -su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null" -TEST [ $? -eq 0 ] -su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null" -TEST [ $? -eq 0 ] - -# cleanup -userdel --force ${NEW_USER} -for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID}) -do - groupdel ${NEW_USER}-${GID} -done - -rm -f $N0/README -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 - -TEST $CLI volume stop $V0 -TEST $CLI volume delete $V0 - -cleanup diff --git a/tests/bugs/nfs/bug-1116503.t b/tests/bugs/nfs/bug-1116503.t deleted file mode 100644 index dd3998d..0000000 --- a/tests/bugs/nfs/bug-1116503.t +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -# -# Verify that mounting NFS over UDP (MOUNT service only) works. -# - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/$V0 -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume set $V0 nfs.mount-udp on - -TEST $CLI volume start $V0 -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; - -TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp; -TEST mkdir -p $N0/foo/bar -TEST ls $N0/foo -TEST ls $N0/foo/bar -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 - -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 - -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 - -TEST $CLI volume set $V0 nfs.addr-namelookup on -TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0 -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 - -TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0 -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp; - -cleanup; diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t deleted file mode 100644 index c360db4..0000000 --- a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume set $V0 performance.open-behind off -TEST $CLI volume start $V0 - -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available - -TEST mount_nfs $H0:/$V0 $N0 nolock -TEST mkdir -p $N0/foo -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 -TEST mount_nfs $H0:/$V0/foo $N0 nolock -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 -cleanup diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t deleted file mode 100644 index dea609e..0000000 --- a/tests/bugs/nfs/bug-1157223-symlink-mounting.t +++ /dev/null @@ -1,126 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; - -## Start and create a volume -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume info; -TEST $CLI volume create $V0 $H0:$B0/$V0 -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume start $V0; - -## Wait for volume to register with rpc.mountd -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; - -## Mount NFS -TEST mount_nfs $H0:/$V0 $N0 nolock; - -mkdir $N0/dir1; -mkdir $N0/dir2; -pushd $N0/ ; - -##link created using relative path -ln -s dir1 symlink1; - -##relative path contains ".." -ln -s ../dir1 dir2/symlink2; - -##link created using absolute path -ln -s $N0/dir1 symlink3; - -##link pointing to another symlinks -ln -s symlink1 symlink4 -ln -s symlink3 symlink5 - -##dead links -ln -s does/not/exist symlink6 - -##link which contains ".." points out of glusterfs -ln -s ../../ symlink7 - -##links pointing to unauthorized area -ln -s .glusterfs symlink8 - -popd ; - -##Umount the volume -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount and umount NFS via directory -TEST mount_nfs $H0:/$V0/dir1 $N0 nolock; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount and umount NFS via symlink1 -TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount and umount NFS via symlink2 -TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount NFS via symlink3 should fail -TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock; - -## Mount and umount NFS via symlink4 -TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount NFS via symlink5 should fail -TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock; - -## Mount NFS via symlink6 should fail -TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock; - -## Mount NFS via symlink7 should fail -TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock; - -## Mount NFS via symlink8 should fail -TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock; - -##Similar check for udp mount -$CLI volume stop $V0 -TEST $CLI volume set $V0 nfs.mount-udp on -$CLI volume start $V0 - -## Wait for volume to register with rpc.mountd -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; - -## Mount and umount NFS via directory -TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount and umount NFS via symlink1 -TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount and umount NFS via symlink2 -TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount NFS via symlink3 should fail -TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp; - -## Mount and umount NFS via symlink4 -TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp; -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0 - -## Mount NFS via symlink5 should fail -TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp; - -## Mount NFS via symlink6 should fail -TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp; - -##symlink7 is not check here, because in udp mount ../../ resolves into root '/' - -## Mount NFS via symlink8 should fail -TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp; - -rm -rf $H0:$B0/ -cleanup; diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t deleted file mode 100644 index 45a22e7..0000000 --- a/tests/bugs/nfs/bug-1161092-nfs-acls.t +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume info - -TEST $CLI volume create $V0 $H0:$B0/brick1; -EXPECT 'Created' volinfo_field $V0 'Status'; -TEST $CLI volume set $V0 nfs.disable false - -TEST $CLI volume start $V0; -EXPECT 'Started' volinfo_field $V0 'Status'; - -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available -TEST mount_nfs $H0:/$V0 $N0 - -TEST touch $N0/file1 -TEST chmod 700 $N0/file1 -TEST getfacl $N0/file1 - -TEST $CLI volume set $V0 root-squash on -TEST getfacl $N0/file1 - -TEST umount_nfs $H0:/$V0 $N0 -TEST mount_nfs $H0:/$V0 $N0 -TEST getfacl $N0/file1 - -## Before killing daemon to avoid deadlocks -umount_nfs $N0 - -cleanup; - diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t deleted file mode 100755 index c4f51a2..0000000 --- a/tests/bugs/nfs/bug-1166862.t +++ /dev/null @@ -1,69 +0,0 @@ -#!/bin/bash -# -# When nfs.mount-rmtab is disabled, it should not get updated. -# -# Based on: bug-904065.t -# - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -# count the lines of a file, return 0 if the file does not exist -function count_lines() -{ - if [ -n "$1" ] - then - $@ 2>/dev/null | wc -l - else - echo 0 - fi -} - - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../nfs.rc -. $(dirname $0)/../../volume.rc - -cleanup - -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/brick1 -EXPECT 'Created' volinfo_field $V0 'Status' -TEST $CLI volume set $V0 nfs.disable false - -TEST $CLI volume start $V0; -EXPECT 'Started' volinfo_field $V0 'Status' - -# glusterfs/nfs needs some time to start up in the background -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available - -# disable the rmtab by settting it to the magic "/-" value -TEST $CLI volume set $V0 nfs.mount-rmtab /- - -# before mounting the rmtab should be empty -EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab - -TEST mount_nfs $H0:/$V0 $N0 nolock -EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab - -# showmount should list one client -EXPECT '1' count_lines showmount --no-headers $H0 - -# unmount -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 - -# after resetting the option, the rmtab should get updated again -TEST $CLI volume reset $V0 nfs.mount-rmtab - -# before mounting the rmtab should be empty -EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab - -TEST mount_nfs $H0:/$V0 $N0 nolock -EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab - -# removing a mount -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 -EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab - -cleanup diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c deleted file mode 100644 index d409924..0000000 --- a/tests/bugs/nfs/bug-1210338.c +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -int -main(int argc, char *argv[]) -{ - int ret = -1; - int fd = -1; - - fd = open(argv[1], O_CREAT | O_EXCL, 0644); - - if (fd == -1) { - fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1], - strerror(errno)); - goto out; - } - - ret = 0; - -out: - if (fd > 0) - close(fd); - - return ret; -} diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t deleted file mode 100644 index b5c9245..0000000 --- a/tests/bugs/nfs/bug-1210338.t +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; - -NFS_SOURCE=$(dirname $0)/bug-1210338.c -NFS_EXEC=$(dirname $0)/excl_create - -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/$V0 -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume start $V0 -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0 $N0 nolock - -build_tester $NFS_SOURCE -o $NFS_EXEC -TEST [ -e $NFS_EXEC ] - -TEST $NFS_EXEC $N0/my_file - -rm -f $NFS_EXEC; - -cleanup diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t deleted file mode 100755 index a2fb0e6..0000000 --- a/tests/bugs/nfs/bug-1302948.t +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -# TEST the nfs.rdirplus option -. $(dirname $0)/../../include.rc - -cleanup -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/$V0 -TEST $CLI volume start $V0 -TEST $CLI volume set $V0 nfs.rdirplus off -TEST $CLI volume set $V0 nfs.rdirplus on -cleanup diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t deleted file mode 100755 index 5ccee72..0000000 --- a/tests/bugs/nfs/bug-847622.t +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../nfs.rc -. $(dirname $0)/../../volume.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -case $OSTYPE in -NetBSD) - echo "Skip test on ACL which are not available on NetBSD" >&2 - SKIP_TESTS - exit 0 - ;; -*) - ;; -esac - -cleanup; - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 $H0:$B0/brick0 -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume start $V0 - -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0 $N0 nolock -cd $N0 - -# simple getfacl setfacl commands -TEST touch testfile -TEST setfacl -m u:14:r testfile -TEST getfacl testfile - -cd -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 -cleanup - diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t deleted file mode 100755 index dca315a..0000000 --- a/tests/bugs/nfs/bug-877885.t +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../nfs.rc -. $(dirname $0)/../../volume.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1 -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume start $V0 - -## Mount FUSE with caching disabled -TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \ -$M0; - -TEST touch $M0/file -TEST mkdir $M0/dir - -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0 $N0 nolock -cd $N0 - -rm -rf * & - -TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock; - -cd; - -kill %1; - -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 - -cleanup diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t deleted file mode 100755 index 0eba86e..0000000 --- a/tests/bugs/nfs/bug-904065.t +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash -# -# This test does not use 'showmount' from the nfs-utils package, it would -# require setting up a portmapper (either rpcbind or portmap, depending on the -# Linux distribution used for testing). The persistancy of the rmtab should not -# affect the current showmount outputs, so existing regression tests should be -# sufficient. -# - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -# count the lines of a file, return 0 if the file does not exist -function count_lines() -{ - if [ -e "$1" ] - then - wc -l < $1 - else - echo 0 - fi -} - - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../nfs.rc -. $(dirname $0)/../../volume.rc - -cleanup - -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/brick1 -EXPECT 'Created' volinfo_field $V0 'Status' -TEST $CLI volume set $V0 nfs.disable false - -TEST $CLI volume start $V0; -EXPECT 'Started' volinfo_field $V0 'Status' - -# glusterfs/nfs needs some time to start up in the background -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available - -# before mounting the rmtab should be empty -EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab - -TEST mount_nfs $H0:/$V0 $N0 nolock -# the output would looks similar to: -# -# hostname-0=172.31.122.104 -# mountpoint-0=/ufo -# -EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab - -# duplicate mounts should not be recorded (client could have crashed) -TEST mount_nfs $H0:/$V0 $N1 nolock -EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab - -# removing a mount should (even if there are two) should remove the entry -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1 -EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab - -# unmounting the other mount should work flawlessly -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 -EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab - -TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0 - -# we'll create a fake rmtab here, similar to how an other storage server would do -# using an invalid IP address to prevent (unlikely) collisions on the test-machine -cat << EOF > $M0/rmtab -hostname-0=127.0.0.256 -mountpoint-0=/ufo -EOF -EXPECT '2' count_lines $M0/rmtab - -# reconfigure merges the rmtab with the one on the volume -TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab - -# glusterfs/nfs needs some time to restart -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available - -# Apparently "is_nfs_export_available" might return even if the export is -# not, in fact, available. (eyeroll) Give it a bit of extra time. -# -# TBD: fix the broken shell function instead of working around it here -sleep 5 - -# a new mount should be added to the rmtab, not overwrite exiting ones -TEST mount_nfs $H0:/$V0 $N0 nolock -EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab - -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 -EXPECT '2' count_lines $M0/rmtab - -# TODO: nfs/reconfigure() is never called and is therefor disabled. When the -# NFS-server supports reloading and does not get restarted anymore, we should -# add a test that includes the merging of entries in the old rmtab with the new -# rmtab. - -cleanup diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t deleted file mode 100755 index bd27915..0000000 --- a/tests/bugs/nfs/bug-915280.t +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; - -TEST glusterd -TEST pidof glusterd - -function volinfo_field() -{ - local vol=$1; - local field=$2; - - $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; -} - -TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2; -EXPECT 'Created' volinfo_field $V0 'Status'; -TEST $CLI volume set $V0 nfs.disable false - -TEST $CLI volume start $V0; -EXPECT 'Started' volinfo_field $V0 'Status'; - -MOUNTDIR=$N0; -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1 -TEST touch $N0/testfile - -TEST $CLI volume set $V0 debug.error-gen client -TEST $CLI volume set $V0 debug.error-fops stat -TEST $CLI volume set $V0 debug.error-failure 100 - -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; - -pid_file=$(read_nfs_pidfile); - -getfacl $N0/testfile 2>/dev/null - -nfs_pid=$(get_nfs_pid); -if [ ! $nfs_pid ] -then - nfs_pid=0; -fi - -TEST [ $nfs_pid -eq $pid_file ] - -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR - -cleanup; diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t deleted file mode 100755 index 61be484..0000000 --- a/tests/bugs/nfs/bug-970070.t +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -# TEST the nfs.acl option -. $(dirname $0)/../../include.rc - -cleanup -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/$V0 -TEST $CLI volume start $V0 -TEST $CLI volume set $V0 nfs.acl off -TEST $CLI volume set $V0 nfs.acl on -cleanup diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t deleted file mode 100755 index 975c46f..0000000 --- a/tests/bugs/nfs/bug-974972.t +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -#This script checks that nfs mount does not fail lookup on files with split-brain -cleanup; - -TEST glusterd -TEST pidof glusterd -TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} -TEST $CLI volume set $V0 self-heal-daemon off -TEST $CLI volume set $V0 cluster.eager-lock off -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume start $V0 -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0 $N0 -TEST touch $N0/1 -TEST kill_brick ${V0} ${H0} ${B0}/${V0}1 -echo abc > $N0/1 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 - -TEST kill_brick ${V0} ${H0} ${B0}/${V0}0 -echo def > $N0/1 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1 - -#Lookup should not fail -TEST ls $N0/1 -TEST ! cat $N0/1 - -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 -cleanup diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t deleted file mode 100644 index f1b6859..0000000 --- a/tests/bugs/nfs/showmount-many-clients.t +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# -# The nfs.rpc-auth-allow volume option is used to generate the list of clients -# that are displayed as able to mount the export. The "group" in the export -# should be a list of all clients, identified by "name". In previous versions, -# the "name" was the copied string from nfs.rpc-auth-allow. This is not -# correct, as the volume option should be parsed and split into different -# groups. -# -# When the single string is passed, this testcase fails when the -# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting -# the groups into their own structures, this testcase passes. -# - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../nfs.rc -. $(dirname $0)/../../volume.rc - -cleanup - -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/brick1 -EXPECT 'Created' volinfo_field $V0 'Status' -TEST $CLI volume set $V0 nfs.disable false - -CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,) -TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS} -TEST $CLI volume set $V0 nfs.rpc-auth-reject all - -TEST $CLI volume start $V0; -EXPECT 'Started' volinfo_field $V0 'Status' - -# glusterfs/nfs needs some time to start up in the background -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available - -# showmount should not timeout (no reply is sent on error) -TEST showmount -e $H0 - -cleanup diff --git a/tests/bugs/nfs/socket-as-fifo.py b/tests/bugs/nfs/socket-as-fifo.py deleted file mode 100755 index eb507e1..0000000 --- a/tests/bugs/nfs/socket-as-fifo.py +++ /dev/null @@ -1,33 +0,0 @@ -# -# Create a unix domain socket and test if it is a socket (and not a fifo/pipe). -# -# Author: Niels de Vos -# - -from __future__ import print_function -import os -import stat -import sys -import socket - -ret = 1 - -if len(sys.argv) != 2: - print('Usage: %s ' % (sys.argv[0])) - sys.exit(ret) - -path = sys.argv[1] - -sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) -sock.bind(path) - -stbuf = os.stat(path) -mode = stbuf.st_mode - -if stat.S_ISSOCK(mode): - ret = 0 - -sock.close() -os.unlink(path) - -sys.exit(ret) diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t deleted file mode 100644 index d9b9e95..0000000 --- a/tests/bugs/nfs/socket-as-fifo.t +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; - -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/$V0 -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume start $V0 -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0 $N0 nolock - -# this is the actual test -TEST $PYTHON $(dirname $0)/socket-as-fifo.py $N0/not-a-fifo.socket - -TEST umount_nfs $N0 - -cleanup diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t deleted file mode 100644 index 6a11487..0000000 --- a/tests/bugs/nfs/subdir-trailing-slash.t +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -# -# Verify that mounting a subdir over NFS works, even with a trailing / -# -# For example: -# mount -t nfs server.example.com:/volume/subdir/ -# - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup; -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/$V0 -TEST $CLI volume set $V0 nfs.disable false - -TEST $CLI volume start $V0 -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available - -TEST mount_nfs $H0:/$V0 $N0 nolock -TEST mkdir -p $N0/subdir -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 - -TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock -EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 - -cleanup diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t deleted file mode 100755 index 2a94009..0000000 --- a/tests/bugs/nfs/zero-atime.t +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# -# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If -# not set, these values are 0 and cause a atime/mtime set to the Epoch. -# - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../nfs.rc - -#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST - -cleanup - -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 $H0:$B0/$V0 -TEST $CLI volume set $V0 nfs.disable false -TEST $CLI volume start $V0 -EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST mount_nfs $H0:/$V0 $N0 nolock - -# create a file for testing -TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k - -# timezone in UTC results in atime=0 if not set correctly -TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat -TEST [ "$(stat --format=%X $M0/small)" != "0" ] - -TEST rm $M0/small - -cleanup diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t index 65af274..9ad0ab2 100755 --- a/tests/bugs/rpc/bug-954057.t +++ b/tests/bugs/rpc/bug-954057.t @@ -25,7 +25,15 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 TEST mkdir $M0/dir TEST mkdir $M0/nobody -TEST chown nfsnobody:nfsnobody $M0/nobody +grep nfsnobody /etc/passwd > /dev/nul +if [ $? -eq 1 ]; then +usr=nobody +grp=nobody +else +usr=nfsnobody +grp=nfsnobody +fi +TEST chown $usr:$grp $M0/nobody TEST `echo "file" >> $M0/file` TEST cp $M0/file $M0/new TEST chmod 700 $M0/new diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t index 7628870..66e896a 100644 --- a/tests/bugs/shard/bug-1272986.t +++ b/tests/bugs/shard/bug-1272986.t @@ -16,16 +16,16 @@ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 # Write some data into a file, such that its size crosses the shard block size. -TEST dd if=/dev/zero of=$M1/file bs=1M count=5 conv=notrunc +TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct md5sum1_reader=$(md5sum $M0/file | awk '{print $1}') EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'` # Append some more data into the file. -TEST `echo "abcdefg" >> $M1/file` +TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct -md5sum2_reader=$(md5sum $M0/file | awk '{print $1}') +md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}') # Test to see if the reader refreshes its cache correctly as part of the reads # triggered through md5sum. If it does, then the md5sum on the reader and writer diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t index d4c0702..8070bc1 100755 --- a/tests/bugs/transport/bug-873367.t +++ b/tests/bugs/transport/bug-873367.t @@ -13,7 +13,7 @@ rm -f $SSL_BASE/glusterfs.* mkdir -p $B0/1 mkdir -p $M0 -TEST openssl genrsa -out $SSL_KEY 1024 +TEST openssl genrsa -out $SSL_KEY 2048 TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT ln $SSL_CERT $SSL_CA diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t index 3cb45b5..cae010c 100755 --- a/tests/features/ssl-authz.t +++ b/tests/features/ssl-authz.t @@ -41,7 +41,7 @@ function valid_ciphers { -e '/:$/s///' } -TEST openssl genrsa -out $SSL_KEY 1024 +TEST openssl genrsa -out $SSL_KEY 2048 TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT ln $SSL_CERT $SSL_CA diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t index 7e1e199..e4bcdf5 100644 --- a/tests/features/ssl-ciphers.t +++ b/tests/features/ssl-ciphers.t @@ -33,18 +33,26 @@ wait_mount() { openssl_connect() { ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA" ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR" - #echo openssl s_client $ssl_opt $@ > /dev/tty - #read -p "Continue? " nothing - CIPHER=`echo "" | - openssl s_client $ssl_opt $@ 2>/dev/null | - awk '/^ Cipher/{print $3}'` - if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" ] ; then + cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null" + CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') + if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then echo "N" else echo "Y" fi } +#Validate the cipher to pass EXPECT test case before call openssl_connect +check_cipher() { + cmd="echo "" | openssl s_client $@ 2> /dev/null" + cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}') + if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then + echo "N" + else + echo "Y" + fi +} + cleanup; mkdir -p $B0 mkdir -p $M0 @@ -65,7 +73,7 @@ TEST glusterd TEST pidof glusterd TEST $CLI volume info; -TEST openssl genrsa -out $SSL_KEY 1024 2>/dev/null +TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \ -subj /CN=CA -out $SSL_CA TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \ @@ -106,28 +114,36 @@ EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT EXPECT "N" openssl_connect -tls1 -connect $H0:$BRICK_PORT # Test a HIGH CBC cipher -EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT # Test EECDH -EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT # test MD5 fails -EXPECT "N" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT # test RC4 fails -EXPECT "N" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT # test eNULL fails -EXPECT "N" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT # test SHA2 -EXPECT "Y" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT # test GCM -EXPECT "Y" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT # Test DH fails without DH params -EXPECT "N" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT # Test DH with DH params TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem @@ -145,8 +161,10 @@ TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count BRICK_PORT=`brick_port $V0` -EXPECT "Y" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT -EXPECT "N" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT # Test the ec-curve option TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1 @@ -155,8 +173,10 @@ TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count BRICK_PORT=`brick_port $V0` -EXPECT "N" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT -EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT TEST $CLI volume set $V0 ssl.ec-curve invalid EXPECT invalid volume_option $V0 ssl.ec-curve @@ -164,7 +184,8 @@ TEST $CLI volume stop $V0 TEST $CLI volume start $V0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count BRICK_PORT=`brick_port $V0` -EXPECT "N" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT +cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT` +EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT TEST $CLI volume set $V0 ssl.ec-curve secp521r1 EXPECT secp521r1 volume_option $V0 ssl.ec-curve diff --git a/tests/ssl.rc b/tests/ssl.rc index 127f83f..b1ccc4c 100644 --- a/tests/ssl.rc +++ b/tests/ssl.rc @@ -20,7 +20,7 @@ SSL_CA=$SSL_BASE/glusterfs.ca # Create self-signed certificates function create_self_signed_certs (){ - openssl genrsa -out $SSL_KEY 1024 + openssl genrsa -out $SSL_KEY 2048 openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT ln $SSL_CERT $SSL_CA return $? diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index b248767..b224abd 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -10,6883 +10,6417 @@ #include -#include "shard.h" #include "shard-mem-types.h" +#include "shard.h" #include #include #include -static gf_boolean_t -__is_shard_dir(uuid_t gfid) -{ - shard_priv_t *priv = THIS->private; +static gf_boolean_t __is_shard_dir(uuid_t gfid) { + shard_priv_t *priv = THIS->private; - if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) - return _gf_true; + if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0) + return _gf_true; - return _gf_false; + return _gf_false; } -static gf_boolean_t -__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) -{ - if (frame->root->pid == GF_CLIENT_PID_GSYNCD && - (__is_shard_dir(loc->pargfid) || - (loc->parent && __is_shard_dir(loc->parent->gfid)))) - return _gf_true; +static gf_boolean_t __is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc) { + if (frame->root->pid == GF_CLIENT_PID_GSYNCD && + (__is_shard_dir(loc->pargfid) || + (loc->parent && __is_shard_dir(loc->parent->gfid)))) + return _gf_true; - return _gf_false; + return _gf_false; } -void -shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) -{ - char gfid_str[GF_UUID_BUF_SIZE] = { - 0, - }; +void shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len) { + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; - gf_uuid_unparse(gfid, gfid_str); - snprintf(buf, len, "%s.%d", gfid_str, block_num); + gf_uuid_unparse(gfid, gfid_str); + snprintf(buf, len, "%s.%d", gfid_str, block_num); } -void -shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len) -{ - char gfid_str[GF_UUID_BUF_SIZE] = { - 0, - }; +void shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, + size_t len) { + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; - gf_uuid_unparse(gfid, gfid_str); - snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); + gf_uuid_unparse(gfid, gfid_str); + snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num); } -int -__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) -{ - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx_p = NULL; +int __shard_inode_ctx_get(inode_t *inode, xlator_t *this, + shard_inode_ctx_t **ctx) { + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx_p = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret == 0) { - *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - return ret; - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret == 0) { + *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + return ret; + } - ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); - if (!ctx_p) - return ret; + ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t); + if (!ctx_p) + return ret; - INIT_LIST_HEAD(&ctx_p->ilist); - INIT_LIST_HEAD(&ctx_p->to_fsync_list); + INIT_LIST_HEAD(&ctx_p->ilist); + INIT_LIST_HEAD(&ctx_p->to_fsync_list); - ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); - if (ret < 0) { - GF_FREE(ctx_p); - return ret; - } + ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); + if (ret < 0) { + GF_FREE(ctx_p); + return ret; + } - *ctx = ctx_p; + *ctx = ctx_p; - return ret; + return ret; } -int -shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx) -{ - int ret = 0; +int shard_inode_ctx_get(inode_t *inode, xlator_t *this, + shard_inode_ctx_t **ctx) { + int ret = 0; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_get(inode, this, ctx); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { ret = __shard_inode_ctx_get(inode, this, ctx); } + UNLOCK(&inode->lock); - return ret; + return ret; } -int -__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, - uint64_t block_size, int32_t valid) -{ - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +int __shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, + uint64_t block_size, int32_t valid) { + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - if (valid & SHARD_MASK_BLOCK_SIZE) - ctx->block_size = block_size; + if (valid & SHARD_MASK_BLOCK_SIZE) + ctx->block_size = block_size; - if (valid & SHARD_MASK_PROT) - ctx->stat.ia_prot = stbuf->ia_prot; + if (valid & SHARD_MASK_PROT) + ctx->stat.ia_prot = stbuf->ia_prot; - if (valid & SHARD_MASK_NLINK) - ctx->stat.ia_nlink = stbuf->ia_nlink; + if (valid & SHARD_MASK_NLINK) + ctx->stat.ia_nlink = stbuf->ia_nlink; - if (valid & SHARD_MASK_UID) - ctx->stat.ia_uid = stbuf->ia_uid; + if (valid & SHARD_MASK_UID) + ctx->stat.ia_uid = stbuf->ia_uid; - if (valid & SHARD_MASK_GID) - ctx->stat.ia_gid = stbuf->ia_gid; + if (valid & SHARD_MASK_GID) + ctx->stat.ia_gid = stbuf->ia_gid; - if (valid & SHARD_MASK_SIZE) - ctx->stat.ia_size = stbuf->ia_size; + if (valid & SHARD_MASK_SIZE) + ctx->stat.ia_size = stbuf->ia_size; - if (valid & SHARD_MASK_BLOCKS) - ctx->stat.ia_blocks = stbuf->ia_blocks; + if (valid & SHARD_MASK_BLOCKS) + ctx->stat.ia_blocks = stbuf->ia_blocks; - if (valid & SHARD_MASK_TIMES) { - SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, - stbuf->ia_mtime, stbuf->ia_mtime_nsec); - SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, - stbuf->ia_ctime, stbuf->ia_ctime_nsec); - SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, - stbuf->ia_atime, stbuf->ia_atime_nsec); - } + if (valid & SHARD_MASK_TIMES) { + SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec, + stbuf->ia_mtime, stbuf->ia_mtime_nsec); + SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec, + stbuf->ia_ctime, stbuf->ia_ctime_nsec); + SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec, + stbuf->ia_atime, stbuf->ia_atime_nsec); + } - if (valid & SHARD_MASK_OTHERS) { - ctx->stat.ia_ino = stbuf->ia_ino; - gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); - ctx->stat.ia_dev = stbuf->ia_dev; - ctx->stat.ia_type = stbuf->ia_type; - ctx->stat.ia_rdev = stbuf->ia_rdev; - ctx->stat.ia_blksize = stbuf->ia_blksize; - } + if (valid & SHARD_MASK_OTHERS) { + ctx->stat.ia_ino = stbuf->ia_ino; + gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid); + ctx->stat.ia_dev = stbuf->ia_dev; + ctx->stat.ia_type = stbuf->ia_type; + ctx->stat.ia_rdev = stbuf->ia_rdev; + ctx->stat.ia_blksize = stbuf->ia_blksize; + } - if (valid & SHARD_MASK_REFRESH_RESET) - ctx->refresh = _gf_false; + if (valid & SHARD_MASK_REFRESH_RESET) + ctx->refresh = _gf_false; - return 0; + return 0; } -int -shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, - uint64_t block_size, int32_t valid) -{ - int ret = -1; +int shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf, + uint64_t block_size, int32_t valid) { + int ret = -1; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid); } + UNLOCK(&inode->lock); - return ret; + return ret; } -int -__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) -{ - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +int __shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - ctx->refresh = _gf_true; + ctx->refresh = _gf_true; - return 0; + return 0; } -int -shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) -{ - int ret = -1; +int shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this) { + int ret = -1; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_set_refresh_flag(inode, this); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { ret = __shard_inode_ctx_set_refresh_flag(inode, this); } + UNLOCK(&inode->lock); - return ret; + return ret; } -int -__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) -{ - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +int __shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - ctx->refreshed = _gf_true; - return 0; + ctx->refreshed = _gf_true; + return 0; } -int -shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) -{ - int ret = -1; +int shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this) { + int ret = -1; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { ret = __shard_inode_ctx_mark_dir_refreshed(inode, this); } + UNLOCK(&inode->lock); - return ret; + return ret; } -int -__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, - inode_t *shard_inode) -{ - int ret = -1; - shard_inode_ctx_t *base_ictx = NULL; - shard_inode_ctx_t *shard_ictx = NULL; - - ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); - if (ret) - return ret; +int __shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, + inode_t *shard_inode) { + int ret = -1; + shard_inode_ctx_t *base_ictx = NULL; + shard_inode_ctx_t *shard_ictx = NULL; - ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(base_inode, this, &base_ictx); + if (ret) + return ret; - if (shard_ictx->fsync_needed) { - shard_ictx->fsync_needed++; - return 1; - } + ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx); + if (ret) + return ret; - list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); - shard_ictx->inode = shard_inode; + if (shard_ictx->fsync_needed) { shard_ictx->fsync_needed++; - base_ictx->fsync_count++; - shard_ictx->base_inode = base_inode; + return 1; + } - return 0; + list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); + shard_ictx->inode = shard_inode; + shard_ictx->fsync_needed++; + base_ictx->fsync_count++; + shard_ictx->base_inode = base_inode; + + return 0; } -int -shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, - inode_t *shard_inode) -{ - int ret = -1; +int shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this, + inode_t *shard_inode) { + int ret = -1; - /* This ref acts as a refkeepr on the base inode. We - * need to keep this inode alive as it holds the head - * of the to_fsync_list. - */ - inode_ref(base_inode); - inode_ref(shard_inode); + /* This ref acts as a refkeepr on the base inode. We + * need to keep this inode alive as it holds the head + * of the to_fsync_list. + */ + inode_ref(base_inode); + inode_ref(shard_inode); - LOCK(&base_inode->lock); - LOCK(&shard_inode->lock); - { - ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, - shard_inode); - } - UNLOCK(&shard_inode->lock); - UNLOCK(&base_inode->lock); + LOCK(&base_inode->lock); + LOCK(&shard_inode->lock); + { ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this, shard_inode); } + UNLOCK(&shard_inode->lock); + UNLOCK(&base_inode->lock); - /* Unref the base inode corresponding to the ref above, if the shard is - * found to be already part of the fsync list. - */ - if (ret != 0) { - inode_unref(base_inode); - inode_unref(shard_inode); - } - return ret; + /* Unref the base inode corresponding to the ref above, if the shard is + * found to be already part of the fsync list. + */ + if (ret != 0) { + inode_unref(base_inode); + inode_unref(shard_inode); + } + return ret; } -gf_boolean_t -__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) -{ - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +gf_boolean_t __shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - /* If inode ctx get fails, better to err on the side of caution and - * try again? Unless the failure is due to mem-allocation. - */ - if (ret) - return _gf_true; + ret = __shard_inode_ctx_get(inode, this, &ctx); + /* If inode ctx get fails, better to err on the side of caution and + * try again? Unless the failure is due to mem-allocation. + */ + if (ret) + return _gf_true; - return !ctx->refreshed; + return !ctx->refreshed; } -gf_boolean_t -shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) -{ - gf_boolean_t flag = _gf_false; +gf_boolean_t shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this) { + gf_boolean_t flag = _gf_false; - LOCK(&inode->lock); - { - flag = __shard_inode_ctx_needs_lookup(inode, this); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { flag = __shard_inode_ctx_needs_lookup(inode, this); } + UNLOCK(&inode->lock); - return flag; + return flag; } -int -__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) -{ - int ret = -1; - shard_inode_ctx_t *ctx = NULL; +int __shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, + struct iatt *stbuf) { + int ret = -1; + shard_inode_ctx_t *ctx = NULL; - ret = __shard_inode_ctx_get(inode, this, &ctx); - if (ret) - return ret; + ret = __shard_inode_ctx_get(inode, this, &ctx); + if (ret) + return ret; - if ((stbuf->ia_size != ctx->stat.ia_size) || - (stbuf->ia_blocks != ctx->stat.ia_blocks)) - ctx->refresh = _gf_true; + if ((stbuf->ia_size != ctx->stat.ia_size) || + (stbuf->ia_blocks != ctx->stat.ia_blocks)) + ctx->refresh = _gf_true; - return 0; + return 0; } -int -shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf) -{ - int ret = -1; +int shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, + struct iatt *stbuf) { + int ret = -1; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_invalidate(inode, this, stbuf); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { ret = __shard_inode_ctx_invalidate(inode, this, stbuf); } + UNLOCK(&inode->lock); - return ret; + return ret; } -int -__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, - uint64_t *block_size) -{ - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int __shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, + uint64_t *block_size) { + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - *block_size = ctx->block_size; + *block_size = ctx->block_size; - return 0; + return 0; } -int -shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, - uint64_t *block_size) -{ - int ret = -1; +int shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this, + uint64_t *block_size) { + int ret = -1; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_get_block_size(inode, this, block_size); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { ret = __shard_inode_ctx_get_block_size(inode, this, block_size); } + UNLOCK(&inode->lock); - return ret; + return ret; } -int -__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, - int *fsync_count) -{ - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int __shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, + int *fsync_count) { + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - *fsync_count = ctx->fsync_needed; + *fsync_count = ctx->fsync_needed; - return 0; + return 0; } -int -shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, - int *fsync_count) -{ - int ret = -1; +int shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this, + int *fsync_count) { + int ret = -1; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count); } + UNLOCK(&inode->lock); - return ret; + return ret; } -int -__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, - shard_inode_ctx_t *ctx_out) -{ - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int __shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, + shard_inode_ctx_t *ctx_out) { + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); - return 0; + memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t)); + return 0; } -int -shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, - shard_inode_ctx_t *ctx_out) -{ - int ret = -1; +int shard_inode_ctx_get_all(inode_t *inode, xlator_t *this, + shard_inode_ctx_t *ctx_out) { + int ret = -1; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_get_all(inode, this, ctx_out); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { ret = __shard_inode_ctx_get_all(inode, this, ctx_out); } + UNLOCK(&inode->lock); - return ret; + return ret; } -int -__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, - struct iatt *buf, - gf_boolean_t *need_refresh) -{ - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int __shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, + struct iatt *buf, + gf_boolean_t *need_refresh) { + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - if (ctx->refresh == _gf_false) - *buf = ctx->stat; - else - *need_refresh = _gf_true; + if (ctx->refresh == _gf_false) + *buf = ctx->stat; + else + *need_refresh = _gf_true; - return 0; + return 0; } -int -shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, - struct iatt *buf, - gf_boolean_t *need_refresh) -{ - int ret = -1; +int shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this, + struct iatt *buf, + gf_boolean_t *need_refresh) { + int ret = -1; - LOCK(&inode->lock); - { - ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, - need_refresh); - } - UNLOCK(&inode->lock); + LOCK(&inode->lock); + { + ret = + __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf, need_refresh); + } + UNLOCK(&inode->lock); - return ret; + return ret; } -void -shard_local_wipe(shard_local_t *local) -{ - int i = 0; - int count = 0; - - count = local->num_blocks; - - syncbarrier_destroy(&local->barrier); - loc_wipe(&local->loc); - loc_wipe(&local->dot_shard_loc); - loc_wipe(&local->dot_shard_rm_loc); - loc_wipe(&local->loc2); - loc_wipe(&local->tmp_loc); - loc_wipe(&local->int_inodelk.loc); - loc_wipe(&local->int_entrylk.loc); - loc_wipe(&local->newloc); - - if (local->int_entrylk.basename) - GF_FREE(local->int_entrylk.basename); - if (local->fd) - fd_unref(local->fd); - - if (local->xattr_req) - dict_unref(local->xattr_req); - if (local->xattr_rsp) - dict_unref(local->xattr_rsp); - - for (i = 0; i < count; i++) { - if (!local->inode_list) - break; - - if (local->inode_list[i]) - inode_unref(local->inode_list[i]); - } - - GF_FREE(local->inode_list); - - GF_FREE(local->vector); - if (local->iobref) - iobref_unref(local->iobref); - if (local->list_inited) - gf_dirent_free(&local->entries_head); - if (local->inodelk_frame) - SHARD_STACK_DESTROY(local->inodelk_frame); - if (local->entrylk_frame) - SHARD_STACK_DESTROY(local->entrylk_frame); -} - -int -shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) -{ - int ret = -1; - void *size_attr = NULL; - uint64_t size_array[4]; - - ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); - if (ret) { - gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, - SHARD_MSG_INTERNAL_XATTR_MISSING, - "Failed to " - "get " GF_XATTR_SHARD_FILE_SIZE " for %s", - uuid_utoa(stbuf->ia_gfid)); - return ret; - } +void shard_local_wipe(shard_local_t *local) { + int i = 0; + int count = 0; - memcpy(size_array, size_attr, sizeof(size_array)); + count = local->num_blocks; - stbuf->ia_size = ntoh64(size_array[0]); - stbuf->ia_blocks = ntoh64(size_array[2]); + syncbarrier_destroy(&local->barrier); + loc_wipe(&local->loc); + loc_wipe(&local->dot_shard_loc); + loc_wipe(&local->dot_shard_rm_loc); + loc_wipe(&local->loc2); + loc_wipe(&local->tmp_loc); + loc_wipe(&local->int_inodelk.loc); + loc_wipe(&local->int_entrylk.loc); + loc_wipe(&local->newloc); - return 0; -} + if (local->int_entrylk.basename) + GF_FREE(local->int_entrylk.basename); + if (local->fd) + fd_unref(local->fd); -int -shard_call_count_return(call_frame_t *frame) -{ - int call_count = 0; - shard_local_t *local = NULL; + if (local->xattr_req) + dict_unref(local->xattr_req); + if (local->xattr_rsp) + dict_unref(local->xattr_rsp); - local = frame->local; + for (i = 0; i < count; i++) { + if (!local->inode_list) + break; + + if (local->inode_list[i]) + inode_unref(local->inode_list[i]); + } + + GF_FREE(local->inode_list); + + GF_FREE(local->vector); + if (local->iobref) + iobref_unref(local->iobref); + if (local->list_inited) + gf_dirent_free(&local->entries_head); + if (local->inodelk_frame) + SHARD_STACK_DESTROY(local->inodelk_frame); + if (local->entrylk_frame) + SHARD_STACK_DESTROY(local->entrylk_frame); +} + +int shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict) { + int ret = -1; + void *size_attr = NULL; + uint64_t size_array[4]; + + ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) { + gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, + SHARD_MSG_INTERNAL_XATTR_MISSING, + "Failed to " + "get " GF_XATTR_SHARD_FILE_SIZE " for %s", + uuid_utoa(stbuf->ia_gfid)); + return ret; + } + + memcpy(size_array, size_attr, sizeof(size_array)); + + stbuf->ia_size = ntoh64(size_array[0]); + stbuf->ia_blocks = ntoh64(size_array[2]); + + return 0; +} + +int shard_call_count_return(call_frame_t *frame) { + int call_count = 0; + shard_local_t *local = NULL; + + local = frame->local; + + LOCK(&frame->lock); + { call_count = --local->call_count; } + UNLOCK(&frame->lock); + + return call_count; +} + +static char *shard_internal_dir_string(shard_internal_dir_type_t type) { + char *str = NULL; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + str = GF_SHARD_DIR; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + str = GF_SHARD_REMOVE_ME_DIR; + break; + default: + break; + } + return str; +} + +static int shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, + shard_internal_dir_type_t type) { + int ret = -1; + char *bname = NULL; + inode_t *parent = NULL; + loc_t *internal_dir_loc = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + if (!local) + return -1; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + internal_dir_loc = &local->dot_shard_loc; + bname = GF_SHARD_DIR; + parent = inode_ref(this->itable->root); + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + internal_dir_loc = &local->dot_shard_rm_loc; + bname = GF_SHARD_REMOVE_ME_DIR; + parent = inode_ref(priv->dot_shard_inode); + break; + default: + break; + } + + internal_dir_loc->inode = inode_new(this->itable); + internal_dir_loc->parent = parent; + ret = inode_path(internal_dir_loc->parent, bname, + (char **)&internal_dir_loc->path); + if (ret < 0 || !(internal_dir_loc->inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", bname); + goto out; + } + + internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); + if (internal_dir_loc->name) + internal_dir_loc->name++; + + ret = 0; +out: + return ret; +} + +inode_t *__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, + inode_t *base_inode, int block_num, + uuid_t gfid) { + char block_bname[256] = { + 0, + }; + inode_t *lru_inode = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *lru_inode_ctx = NULL; + shard_inode_ctx_t *lru_base_inode_ctx = NULL; + inode_t *fsync_inode = NULL; + inode_t *lru_base_inode = NULL; + gf_boolean_t do_fsync = _gf_false; + + priv = this->private; + + shard_inode_ctx_get(linked_inode, this, &ctx); + + if (list_empty(&ctx->ilist)) { + if (priv->inode_count + 1 <= priv->lru_limit) { + /* If this inode was linked here for the first time (indicated + * by empty list), and if there is still space in the priv list, + * add this ctx to the tail of the list. + */ + /* For as long as an inode is in lru list, we try to + * keep it alive by holding a ref on it. + */ + inode_ref(linked_inode); + if (base_inode) + gf_uuid_copy(ctx->base_gfid, base_inode->gfid); + else + gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; + list_add_tail(&ctx->ilist, &priv->ilist_head); + priv->inode_count++; + ctx->base_inode = inode_ref(base_inode); + } else { + /*If on the other hand there is no available slot for this inode + * in the list, delete the lru inode from the head of the list, + * unlink it. And in its place add this new inode into the list. + */ + lru_inode_ctx = + list_first_entry(&priv->ilist_head, shard_inode_ctx_t, ilist); + GF_ASSERT(lru_inode_ctx->block_num > 0); + lru_base_inode = lru_inode_ctx->base_inode; + list_del_init(&lru_inode_ctx->ilist); + lru_inode = inode_find(linked_inode->table, lru_inode_ctx->stat.ia_gfid); + /* If the lru inode was part of the pending-fsync list, + * the base inode needs to be unref'd, the lru inode + * deleted from fsync list and fsync'd in a new frame, + * and then unlinked in memory and forgotten. + */ + if (!lru_base_inode) + goto after_fsync_check; + LOCK(&lru_base_inode->lock); + LOCK(&lru_inode->lock); + { + if (!list_empty(&lru_inode_ctx->to_fsync_list)) { + list_del_init(&lru_inode_ctx->to_fsync_list); + lru_inode_ctx->fsync_needed = 0; + do_fsync = _gf_true; + __shard_inode_ctx_get(lru_base_inode, this, &lru_base_inode_ctx); + lru_base_inode_ctx->fsync_count--; + } + } + UNLOCK(&lru_inode->lock); + UNLOCK(&lru_base_inode->lock); + + after_fsync_check: + if (!do_fsync) { + shard_make_block_bname(lru_inode_ctx->block_num, + lru_inode_ctx->base_gfid, block_bname, + sizeof(block_bname)); + /* The following unref corresponds to the ref held at + * the time the shard was added to the lru list. + */ + inode_unref(lru_inode); + inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); + inode_forget(lru_inode, 0); + } else { + /* The following unref corresponds to the ref + * held when the shard was added to fsync list. + */ + inode_unref(lru_inode); + fsync_inode = lru_inode; + if (lru_base_inode) + inode_unref(lru_base_inode); + } + /* The following unref corresponds to the ref + * held by inode_find() above. + */ + inode_unref(lru_inode); + + /* The following unref corresponds to the ref held on the base shard + * at the time of adding shard inode to lru list + */ + if (lru_base_inode) + inode_unref(lru_base_inode); + + /* For as long as an inode is in lru list, we try to + * keep it alive by holding a ref on it. + */ + inode_ref(linked_inode); + if (base_inode) + gf_uuid_copy(ctx->base_gfid, base_inode->gfid); + else + gf_uuid_copy(ctx->base_gfid, gfid); + ctx->block_num = block_num; + ctx->base_inode = inode_ref(base_inode); + list_add_tail(&ctx->ilist, &priv->ilist_head); + } + } else { + /* If this is not the first time this inode is being operated on, move + * it to the most recently used end of the list. + */ + list_move_tail(&ctx->ilist, &priv->ilist_head); + } + return fsync_inode; +} + +int shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, + int32_t op_ret, int32_t op_errno) { + switch (fop) { + case GF_FOP_LOOKUP: + SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL, NULL); + break; + case GF_FOP_STAT: + SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_FSTAT: + SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_TRUNCATE: + SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_FTRUNCATE: + SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_MKNOD: + SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, + NULL); + break; + case GF_FOP_LINK: + SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, + NULL); + break; + case GF_FOP_CREATE: + SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_UNLINK: + SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_RENAME: + SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, + NULL, NULL); + break; + case GF_FOP_WRITE: + SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_FALLOCATE: + SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_ZEROFILL: + SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_DISCARD: + SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_READ: + SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL, + NULL); + break; + case GF_FOP_FSYNC: + SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_REMOVEXATTR: + SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_FREMOVEXATTR: + SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_FGETXATTR: + SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_GETXATTR: + SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); + break; + case GF_FOP_FSETXATTR: + SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_SETXATTR: + SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); + break; + case GF_FOP_SETATTR: + SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_FSETATTR: + SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, NULL); + break; + case GF_FOP_SEEK: + SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); + break; + default: + gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; +} + +int shard_common_inode_write_success_unwind(glusterfs_fop_t fop, + call_frame_t *frame, + int32_t op_ret) { + shard_local_t *local = NULL; + + local = frame->local; + + switch (fop) { + case GF_FOP_WRITE: + SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, + &local->postbuf, local->xattr_rsp); + break; + case GF_FOP_FALLOCATE: + SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, + &local->postbuf, local->xattr_rsp); + break; + case GF_FOP_ZEROFILL: + SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, + &local->postbuf, local->xattr_rsp); + break; + case GF_FOP_DISCARD: + SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, + &local->postbuf, local->xattr_rsp); + break; + default: + gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; +} + +int shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { + char block_bname[256] = { + 0, + }; + fd_t *anon_fd = cookie; + inode_t *shard_inode = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + + if (anon_fd == NULL || op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, + "fsync failed on shard"); + goto out; + } + shard_inode = anon_fd->inode; + + LOCK(&priv->lock); + LOCK(&shard_inode->lock); + { + __shard_inode_ctx_get(shard_inode, this, &ctx); + if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { + shard_make_block_bname(ctx->block_num, shard_inode->gfid, block_bname, + sizeof(block_bname)); + inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); + /* The following unref corresponds to the ref held by + * inode_link() at the time the shard was created or + * looked up + */ + inode_unref(shard_inode); + inode_forget(shard_inode, 0); + } + } + UNLOCK(&shard_inode->lock); + UNLOCK(&priv->lock); - LOCK(&frame->lock); - { - call_count = --local->call_count; +out: + if (anon_fd) + fd_unref(anon_fd); + STACK_DESTROY(frame->root); + return 0; +} + +int shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) { + fd_t *anon_fd = NULL; + call_frame_t *fsync_frame = NULL; + + fsync_frame = create_frame(this, this->ctx->pool); + if (!fsync_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to fsync shard"); + return -1; + } + + anon_fd = fd_anonymous(inode); + if (!anon_fd) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create anon fd to" + " fsync shard"); + STACK_DESTROY(fsync_frame->root); + return -1; + } + + STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd, + 1, NULL); + return 0; +} + +int shard_common_resolve_shards( + call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t post_res_handler) { + int i = -1; + uint32_t shard_idx_iter = 0; + char path[PATH_MAX] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *inode = NULL; + inode_t *res_inode = NULL; + inode_t *fsync_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + local->call_count = 0; + shard_idx_iter = local->first_block; + res_inode = local->resolver_base_inode; + if (res_inode) + gf_uuid_copy(gfid, res_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + if ((local->op_ret < 0) || (local->resolve_not)) + goto out; + + while (shard_idx_iter <= local->last_block) { + i++; + if (shard_idx_iter == 0) { + local->inode_list[i] = inode_ref(res_inode); + shard_idx_iter++; + continue; + } + + shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); + + inode = NULL; + inode = inode_resolve(this->itable, path); + if (inode) { + gf_msg_debug(this->name, 0, "Shard %d already " + "present. gfid=%s. Saving inode for future.", + shard_idx_iter, uuid_utoa(inode->gfid)); + local->inode_list[i] = inode; + /* Let the ref on the inodes that are already present + * in inode table still be held so that they don't get + * forgotten by the time the fop reaches the actual + * write stage. + */ + LOCK(&priv->lock); + { + fsync_inode = __shard_update_shards_inode_list(inode, this, res_inode, + shard_idx_iter, gfid); + } + UNLOCK(&priv->lock); + shard_idx_iter++; + if (fsync_inode) + shard_initiate_evicted_inode_fsync(this, fsync_inode); + continue; + } else { + local->call_count++; + shard_idx_iter++; } - UNLOCK(&frame->lock); + } +out: + post_res_handler(frame, this); + return 0; +} + +int shard_update_file_size_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *dict, dict_t *xdata) { + inode_t *inode = NULL; + shard_local_t *local = NULL; + + local = frame->local; + + if ((local->fd) && (local->fd->inode)) + inode = local->fd->inode; + else if (local->loc.inode) + inode = local->loc.inode; + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_UPDATE_FILE_SIZE_FAILED, "Update to file size" + " xattr failed on %s", + uuid_utoa(inode->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } - return call_count; + if (shard_modify_size_and_block_count(&local->postbuf, dict)) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } +err: + local->post_update_size_handler(frame, this); + return 0; } -static char * -shard_internal_dir_string(shard_internal_dir_type_t type) -{ - char *str = NULL; +int shard_set_size_attrs(int64_t size, int64_t block_count, + int64_t **size_attr_p) { + int ret = -1; + int64_t *size_attr = NULL; - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - str = GF_SHARD_DIR; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - str = GF_SHARD_REMOVE_ME_DIR; - break; - default: - break; - } - return str; -} - -static int -shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local, - shard_internal_dir_type_t type) -{ - int ret = -1; - char *bname = NULL; - inode_t *parent = NULL; - loc_t *internal_dir_loc = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - if (!local) - return -1; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - internal_dir_loc = &local->dot_shard_loc; - bname = GF_SHARD_DIR; - parent = inode_ref(this->itable->root); - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - internal_dir_loc = &local->dot_shard_rm_loc; - bname = GF_SHARD_REMOVE_ME_DIR; - parent = inode_ref(priv->dot_shard_inode); - break; - default: - break; - } + if (!size_attr_p) + goto out; - internal_dir_loc->inode = inode_new(this->itable); - internal_dir_loc->parent = parent; - ret = inode_path(internal_dir_loc->parent, bname, - (char **)&internal_dir_loc->path); - if (ret < 0 || !(internal_dir_loc->inode)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", bname); - goto out; - } + size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); + if (!size_attr) + goto out; - internal_dir_loc->name = strrchr(internal_dir_loc->path, '/'); - if (internal_dir_loc->name) - internal_dir_loc->name++; + size_attr[0] = hton64(size); + /* As sharding evolves, it _may_ be necessary to embed more pieces of + * information within the same xattr. So allocating slots for them in + * advance. For now, only bytes 0-63 and 128-191 which would make up the + * current size and block count respectively of the file are valid. + */ + size_attr[2] = hton64(block_count); - ret = 0; + *size_attr_p = size_attr; + + ret = 0; out: - return ret; + return ret; } -inode_t * -__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, - inode_t *base_inode, int block_num, - uuid_t gfid) -{ - char block_bname[256] = { - 0, - }; - inode_t *lru_inode = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *lru_inode_ctx = NULL; - shard_inode_ctx_t *lru_base_inode_ctx = NULL; - inode_t *fsync_inode = NULL; - inode_t *lru_base_inode = NULL; - gf_boolean_t do_fsync = _gf_false; - - priv = this->private; - - shard_inode_ctx_get(linked_inode, this, &ctx); - - if (list_empty(&ctx->ilist)) { - if (priv->inode_count + 1 <= priv->lru_limit) { - /* If this inode was linked here for the first time (indicated - * by empty list), and if there is still space in the priv list, - * add this ctx to the tail of the list. - */ - /* For as long as an inode is in lru list, we try to - * keep it alive by holding a ref on it. - */ - inode_ref(linked_inode); - if (base_inode) - gf_uuid_copy(ctx->base_gfid, base_inode->gfid); - else - gf_uuid_copy(ctx->base_gfid, gfid); - ctx->block_num = block_num; - list_add_tail(&ctx->ilist, &priv->ilist_head); - priv->inode_count++; - ctx->base_inode = inode_ref(base_inode); - } else { - /*If on the other hand there is no available slot for this inode - * in the list, delete the lru inode from the head of the list, - * unlink it. And in its place add this new inode into the list. - */ - lru_inode_ctx = list_first_entry(&priv->ilist_head, - shard_inode_ctx_t, ilist); - GF_ASSERT(lru_inode_ctx->block_num > 0); - lru_base_inode = lru_inode_ctx->base_inode; - list_del_init(&lru_inode_ctx->ilist); - lru_inode = inode_find(linked_inode->table, - lru_inode_ctx->stat.ia_gfid); - /* If the lru inode was part of the pending-fsync list, - * the base inode needs to be unref'd, the lru inode - * deleted from fsync list and fsync'd in a new frame, - * and then unlinked in memory and forgotten. - */ - if (!lru_base_inode) - goto after_fsync_check; - LOCK(&lru_base_inode->lock); - LOCK(&lru_inode->lock); - { - if (!list_empty(&lru_inode_ctx->to_fsync_list)) { - list_del_init(&lru_inode_ctx->to_fsync_list); - lru_inode_ctx->fsync_needed = 0; - do_fsync = _gf_true; - __shard_inode_ctx_get(lru_base_inode, this, - &lru_base_inode_ctx); - lru_base_inode_ctx->fsync_count--; - } - } - UNLOCK(&lru_inode->lock); - UNLOCK(&lru_base_inode->lock); - - after_fsync_check: - if (!do_fsync) { - shard_make_block_bname(lru_inode_ctx->block_num, - lru_inode_ctx->base_gfid, block_bname, - sizeof(block_bname)); - /* The following unref corresponds to the ref held at - * the time the shard was added to the lru list. - */ - inode_unref(lru_inode); - inode_unlink(lru_inode, priv->dot_shard_inode, block_bname); - inode_forget(lru_inode, 0); - } else { - /* The following unref corresponds to the ref - * held when the shard was added to fsync list. - */ - inode_unref(lru_inode); - fsync_inode = lru_inode; - if (lru_base_inode) - inode_unref(lru_base_inode); - } - /* The following unref corresponds to the ref - * held by inode_find() above. - */ - inode_unref(lru_inode); - - /* The following unref corresponds to the ref held on the base shard - * at the time of adding shard inode to lru list - */ - if (lru_base_inode) - inode_unref(lru_base_inode); - - /* For as long as an inode is in lru list, we try to - * keep it alive by holding a ref on it. - */ - inode_ref(linked_inode); - if (base_inode) - gf_uuid_copy(ctx->base_gfid, base_inode->gfid); - else - gf_uuid_copy(ctx->base_gfid, gfid); - ctx->block_num = block_num; - ctx->base_inode = inode_ref(base_inode); - list_add_tail(&ctx->ilist, &priv->ilist_head); - } - } else { - /* If this is not the first time this inode is being operated on, move - * it to the most recently used end of the list. - */ - list_move_tail(&ctx->ilist, &priv->ilist_head); - } - return fsync_inode; -} +int shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, + loc_t *loc, + shard_post_update_size_fop_handler_t handler) { + int ret = -1; + int64_t *size_attr = NULL; + int64_t delta_blocks = 0; + inode_t *inode = NULL; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; -int -shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame, - int32_t op_ret, int32_t op_errno) -{ - switch (fop) { - case GF_FOP_LOOKUP: - SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, - NULL, NULL); - break; - case GF_FOP_STAT: - SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL); - break; - case GF_FOP_FSTAT: - SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL); - break; - case GF_FOP_TRUNCATE: - SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_FTRUNCATE: - SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_MKNOD: - SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, - NULL, NULL); - break; - case GF_FOP_LINK: - SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, - NULL, NULL); - break; - case GF_FOP_CREATE: - SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, - NULL, NULL, NULL, NULL); - break; - case GF_FOP_UNLINK: - SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_RENAME: - SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, - NULL, NULL, NULL, NULL); - break; - case GF_FOP_WRITE: - SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_FALLOCATE: - SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_ZEROFILL: - SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_DISCARD: - SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_READ: - SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL, - NULL, NULL); - break; - case GF_FOP_FSYNC: - SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_REMOVEXATTR: - SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL); - break; - case GF_FOP_FREMOVEXATTR: - SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL); - break; - case GF_FOP_FGETXATTR: - SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL); - break; - case GF_FOP_GETXATTR: - SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL); - break; - case GF_FOP_FSETXATTR: - SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL); - break; - case GF_FOP_SETXATTR: - SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL); - break; - case GF_FOP_SETATTR: - SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_FSETATTR: - SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL, - NULL); - break; - case GF_FOP_SEEK: - SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL); - break; - default: - gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "Invalid fop id = %d", fop); - break; - } - return 0; -} + local = frame->local; + local->post_update_size_handler = handler; -int -shard_common_inode_write_success_unwind(glusterfs_fop_t fop, - call_frame_t *frame, int32_t op_ret) -{ - shard_local_t *local = NULL; + xattr_req = dict_new(); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + if (fd) + inode = fd->inode; + else + inode = loc->inode; + + /* If both size and block count have not changed, then skip the xattrop. + */ + delta_blocks = GF_ATOMIC_GET(local->delta_blocks); + if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { + goto out; + } + + ret = shard_set_size_attrs(local->delta_size + local->hole_size, delta_blocks, + &size_attr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, + "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict. gfid=%s", GF_XATTR_SHARD_FILE_SIZE, + uuid_utoa(inode->gfid)); + GF_FREE(size_attr); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } - local = frame->local; + if (fd) + STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, GF_XATTROP_ADD_ARRAY64, + xattr_req, NULL); + else + STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, GF_XATTROP_ADD_ARRAY64, + xattr_req, NULL); - switch (fop) { - case GF_FOP_WRITE: - SHARD_STACK_UNWIND(writev, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); - break; - case GF_FOP_FALLOCATE: - SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); - break; - case GF_FOP_ZEROFILL: - SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); - break; - case GF_FOP_DISCARD: - SHARD_STACK_UNWIND(discard, frame, op_ret, 0, &local->prebuf, - &local->postbuf, local->xattr_rsp); - break; - default: - gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "Invalid fop id = %d", fop); - break; - } - return 0; -} + dict_unref(xattr_req); + return 0; -int -shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - char block_bname[256] = { - 0, - }; - fd_t *anon_fd = cookie; - inode_t *shard_inode = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_priv_t *priv = NULL; +out: + if (xattr_req) + dict_unref(xattr_req); + handler(frame, this); + return 0; +} + +static inode_t *shard_link_internal_dir_inode(shard_local_t *local, + inode_t *inode, struct iatt *buf, + shard_internal_dir_type_t type) { + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + char *bname = NULL; + inode_t **priv_inode = NULL; + inode_t *parent = NULL; + + priv = THIS->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + bname = GF_SHARD_DIR; + priv_inode = &priv->dot_shard_inode; + parent = inode->table->root; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + bname = GF_SHARD_REMOVE_ME_DIR; + priv_inode = &priv->dot_shard_rm_inode; + parent = priv->dot_shard_inode; + break; + default: + break; + } + + linked_inode = inode_link(inode, parent, bname, buf); + inode_lookup(linked_inode); + *priv_inode = linked_inode; + return linked_inode; +} + +int shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { + shard_local_t *local = NULL; + inode_t *linked_inode = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; + + local = frame->local; + + if (op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto out; + } + + /* To-Do: Fix refcount increment per call to + * shard_link_internal_dir_inode(). + */ + linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); + shard_inode_ctx_mark_dir_refreshed(linked_inode, this); +out: + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; +} + +int shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, + shard_internal_dir_type_t type) { + loc_t loc = { + 0, + }; + inode_t *inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; + + local = frame->local; + priv = this->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(gfid, priv->dot_shard_gfid); + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); + break; + default: + break; + } + + inode = inode_find(this->itable, gfid); + + if (!shard_inode_ctx_needs_lookup(inode, this)) { + local->op_ret = 0; + goto out; + } - priv = this->private; + /* Plain assignment because the ref is already taken above through + * call to inode_find() + */ + loc.inode = inode; + gf_uuid_copy(loc.gfid, gfid); - if (anon_fd == NULL || op_ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED, - "fsync failed on shard"); - goto out; - } - shard_inode = anon_fd->inode; + STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, + NULL); + loc_wipe(&loc); - LOCK(&priv->lock); - LOCK(&shard_inode->lock); - { - __shard_inode_ctx_get(shard_inode, this, &ctx); - if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) { - shard_make_block_bname(ctx->block_num, shard_inode->gfid, - block_bname, sizeof(block_bname)); - inode_unlink(shard_inode, priv->dot_shard_inode, block_bname); - /* The following unref corresponds to the ref held by - * inode_link() at the time the shard was created or - * looked up - */ - inode_unref(shard_inode); - inode_forget(shard_inode, 0); - } - } - UNLOCK(&shard_inode->lock); - UNLOCK(&priv->lock); + return 0; out: - if (anon_fd) - fd_unref(anon_fd); - STACK_DESTROY(frame->root); - return 0; + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; } -int -shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode) -{ - fd_t *anon_fd = NULL; - call_frame_t *fsync_frame = NULL; +int shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { + inode_t *link_inode = NULL; + shard_local_t *local = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - fsync_frame = create_frame(this, this->ctx->pool); - if (!fsync_frame) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create new frame " - "to fsync shard"); - return -1; - } + local = frame->local; - anon_fd = fd_anonymous(inode); - if (!anon_fd) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create anon fd to" - " fsync shard"); - STACK_DESTROY(fsync_frame->root); - return -1; - } + if (op_ret) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + + if (!IA_ISDIR(buf->ia_type)) { + gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, + "%s already exists and " + "is not a directory. Please remove it from all bricks " + "and try again", + shard_internal_dir_string(type)); + local->op_ret = -1; + local->op_errno = EIO; + goto unwind; + } + + link_inode = shard_link_internal_dir_inode(local, inode, buf, type); + if (link_inode != inode) { + shard_refresh_internal_dir(frame, this, type); + } else { + shard_inode_ctx_mark_dir_refreshed(link_inode, this); + shard_common_resolve_shards(frame, this, local->post_res_handler); + } + return 0; - STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, - anon_fd, 1, NULL); - return 0; -} +unwind: + local->post_res_handler(frame, this); + return 0; +} + +int shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t post_res_handler, + shard_internal_dir_type_t type) { + int ret = -1; + dict_t *xattr_req = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + uuid_t *gfid = NULL; + loc_t *loc = NULL; + gf_boolean_t free_gfid = _gf_true; + + local = frame->local; + priv = this->private; + local->post_res_handler = post_res_handler; + + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid) + goto err; + + xattr_req = dict_new(); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(*gfid, priv->dot_shard_gfid); + loc = &local->dot_shard_loc; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); + loc = &local->dot_shard_rm_loc; + break; + default: + bzero(*gfid, sizeof(uuid_t)); + break; + } + + ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set gfid of %s into dict", + shard_internal_dir_string(type)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } else { + free_gfid = _gf_false; + } -int -shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t post_res_handler) -{ - int i = -1; - uint32_t shard_idx_iter = 0; - char path[PATH_MAX] = { - 0, - }; - uuid_t gfid = { - 0, - }; - inode_t *inode = NULL; - inode_t *res_inode = NULL; - inode_t *fsync_inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = frame->local; - local->call_count = 0; - shard_idx_iter = local->first_block; - res_inode = local->resolver_base_inode; - if (res_inode) - gf_uuid_copy(gfid, res_inode->gfid); - else - gf_uuid_copy(gfid, local->base_gfid); + STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, + xattr_req); - if ((local->op_ret < 0) || (local->resolve_not)) - goto out; + dict_unref(xattr_req); + return 0; - while (shard_idx_iter <= local->last_block) { - i++; - if (shard_idx_iter == 0) { - local->inode_list[i] = inode_ref(res_inode); - shard_idx_iter++; - continue; - } +err: + if (xattr_req) + dict_unref(xattr_req); + if (free_gfid) + GF_FREE(gfid); + post_res_handler(frame, this); + return 0; +} + +static void shard_inode_ctx_update(inode_t *inode, xlator_t *this, + dict_t *xdata, struct iatt *buf) { + int ret = 0; + uint64_t size = 0; + void *bsize = NULL; + + if (shard_inode_ctx_get_block_size(inode, this, &size)) { + /* Fresh lookup */ + ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); + if (!ret) + size = ntoh64(*((uint64_t *)bsize)); + /* If the file is sharded, set its block size, otherwise just + * set 0. + */ - shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); - - inode = NULL; - inode = inode_resolve(this->itable, path); - if (inode) { - gf_msg_debug(this->name, 0, - "Shard %d already " - "present. gfid=%s. Saving inode for future.", - shard_idx_iter, uuid_utoa(inode->gfid)); - local->inode_list[i] = inode; - /* Let the ref on the inodes that are already present - * in inode table still be held so that they don't get - * forgotten by the time the fop reaches the actual - * write stage. - */ - LOCK(&priv->lock); - { - fsync_inode = __shard_update_shards_inode_list( - inode, this, res_inode, shard_idx_iter, gfid); - } - UNLOCK(&priv->lock); - shard_idx_iter++; - if (fsync_inode) - shard_initiate_evicted_inode_fsync(this, fsync_inode); - continue; - } else { - local->call_count++; - shard_idx_iter++; - } - } -out: - post_res_handler(frame, this); - return 0; + shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); + } + /* If the file is sharded, also set the remaining attributes, + * except for ia_size and ia_blocks. + */ + if (size) { + shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); + (void)shard_inode_ctx_invalidate(inode, this, buf); + } +} + +int shard_delete_shards(void *opaque); + +int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); + +int shard_start_background_deletion(xlator_t *this) { + int ret = 0; + gf_boolean_t i_cleanup = _gf_true; + shard_priv_t *priv = NULL; + call_frame_t *cleanup_frame = NULL; + + priv = this->private; + + LOCK(&priv->lock); + { + switch (priv->bg_del_state) { + case SHARD_BG_DELETION_NONE: + i_cleanup = _gf_true; + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + break; + case SHARD_BG_DELETION_LAUNCHING: + i_cleanup = _gf_false; + break; + case SHARD_BG_DELETION_IN_PROGRESS: + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + i_cleanup = _gf_false; + break; + default: + break; + } + } + UNLOCK(&priv->lock); + if (!i_cleanup) + return 0; + + cleanup_frame = create_frame(this, this->ctx->pool); + if (!cleanup_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create " + "new frame to delete shards"); + ret = -ENOMEM; + goto err; + } + + set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); + + ret = synctask_new(this->ctx->env, shard_delete_shards, + shard_delete_shards_cbk, cleanup_frame, cleanup_frame); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, SHARD_MSG_SHARDS_DELETION_FAILED, + "failed to create task to do background " + "cleanup of shards"); + STACK_DESTROY(cleanup_frame->root); + goto err; + } + return 0; + +err: + LOCK(&priv->lock); + { priv->bg_del_state = SHARD_BG_DELETION_NONE; } + UNLOCK(&priv->lock); + return ret; } -int -shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - inode_t *inode = NULL; - shard_local_t *local = NULL; +int shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { + int ret = -1; + shard_priv_t *priv = NULL; + gf_boolean_t i_start_cleanup = _gf_false; - local = frame->local; + priv = this->private; - if ((local->fd) && (local->fd->inode)) - inode = local->fd->inode; - else if (local->loc.inode) - inode = local->loc.inode; + if (op_ret < 0) + goto unwind; - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_UPDATE_FILE_SIZE_FAILED, - "Update to file size" - " xattr failed on %s", - uuid_utoa(inode->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } + if (IA_ISDIR(buf->ia_type)) + goto unwind; - if (shard_modify_size_and_block_count(&local->postbuf, dict)) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } -err: - local->post_update_size_handler(frame, this); - return 0; -} + /* Also, if the file is sharded, get the file size and block cnt xattr, + * and store them in the stbuf appropriately. + */ -int -shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p) -{ - int ret = -1; - int64_t *size_attr = NULL; + if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && + frame->root->pid != GF_CLIENT_PID_GSYNCD) + shard_modify_size_and_block_count(buf, xdata); - if (!size_attr_p) - goto out; + /* If this was a fresh lookup, there are two possibilities: + * 1) If the file is sharded (indicated by the presence of block size + * xattr), store this block size, along with rdev and mode in its + * inode ctx. + * 2) If the file is not sharded, store size along with rdev and mode + * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is + * already initialised to all zeroes, nothing more needs to be done. + */ - size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t); - if (!size_attr) - goto out; + (void)shard_inode_ctx_update(inode, this, xdata, buf); - size_attr[0] = hton64(size); - /* As sharding evolves, it _may_ be necessary to embed more pieces of - * information within the same xattr. So allocating slots for them in - * advance. For now, only bytes 0-63 and 128-191 which would make up the - * current size and block count respectively of the file are valid. - */ - size_attr[2] = hton64(block_count); + LOCK(&priv->lock); + { + if (priv->first_lookup_done == _gf_false) { + priv->first_lookup_done = _gf_true; + i_start_cleanup = _gf_true; + } + } + UNLOCK(&priv->lock); - *size_attr_p = size_attr; + if (!i_start_cleanup) + goto unwind; - ret = 0; -out: - return ret; + ret = shard_start_background_deletion(this); + if (ret < 0) { + LOCK(&priv->lock); + { priv->first_lookup_done = _gf_false; } + UNLOCK(&priv->lock); + } + +unwind: + SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, + postparent); + return 0; } -int -shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd, - loc_t *loc, shard_post_update_size_fop_handler_t handler) -{ - int ret = -1; - int64_t *size_attr = NULL; - int64_t delta_blocks = 0; - inode_t *inode = NULL; - shard_local_t *local = NULL; - dict_t *xattr_req = NULL; +int shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) { + int ret = -1; + int32_t op_errno = ENOMEM; + uint64_t block_size = 0; + shard_local_t *local = NULL; - local = frame->local; - local->post_update_size_handler = handler; + this->itable = loc->inode->table; + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); + } - xattr_req = dict_new(); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; - } + local = mem_get0(this->local_pool); + if (!local) + goto err; - if (fd) - inode = fd->inode; - else - inode = loc->inode; + frame->local = local; - /* If both size and block count have not changed, then skip the xattrop. - */ - delta_blocks = GF_ATOMIC_GET(local->delta_blocks); - if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) { - goto out; - } + loc_copy(&local->loc, loc); - ret = shard_set_size_attrs(local->delta_size + local->hole_size, - delta_blocks, &size_attr); + local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); + if (!local->xattr_req) + goto err; + + if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED, - "Failed to set size attrs for %s", uuid_utoa(inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict" + " value: key:%s for path %s", + GF_XATTR_SHARD_BLOCK_SIZE, loc->path); + goto err; } + } - ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4); + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set key %s into dict. gfid=%s", - GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid)); - GF_FREE(size_attr); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s for path %s.", + GF_XATTR_SHARD_FILE_SIZE, loc->path); + goto err; } + } - if (fd) - STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fxattrop, fd, - GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); - else - STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, loc, - GF_XATTROP_ADD_ARRAY64, xattr_req, NULL); - - dict_unref(xattr_req); - return 0; - -out: - if (xattr_req) - dict_unref(xattr_req); - handler(frame, this); - return 0; -} - -static inode_t * -shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode, - struct iatt *buf, shard_internal_dir_type_t type) -{ - inode_t *linked_inode = NULL; - shard_priv_t *priv = NULL; - char *bname = NULL; - inode_t **priv_inode = NULL; - inode_t *parent = NULL; - - priv = THIS->private; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - bname = GF_SHARD_DIR; - priv_inode = &priv->dot_shard_inode; - parent = inode->table->root; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - bname = GF_SHARD_REMOVE_ME_DIR; - priv_inode = &priv->dot_shard_rm_inode; - parent = priv->dot_shard_inode; - break; - default: - break; - } + if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) + dict_del(xattr_req, GF_CONTENT_KEY); - linked_inode = inode_link(inode, parent, bname, buf); - inode_lookup(linked_inode); - *priv_inode = linked_inode; - return linked_inode; + STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); + return 0; +err: + shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); + return 0; } -int -shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie, +int shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - shard_local_t *local = NULL; - inode_t *linked_inode = NULL; - shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; - - local = frame->local; - - if (op_ret) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto out; - } + struct iatt *postparent) { + int ret = -1; + int32_t mask = SHARD_INODE_WRITE_MASK; + shard_local_t *local = NULL; + shard_inode_ctx_t ctx = { + 0, + }; + + local = frame->local; + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_BASE_FILE_LOOKUP_FAILED, "Lookup on base file" + " failed : %s", + loc_gfid_utoa(&(local->loc))); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } + + local->prebuf = *buf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + + if (shard_inode_ctx_get_all(inode, this, &ctx)) + mask = SHARD_ALL_MASK; + + ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, + (mask | SHARD_MASK_REFRESH_RESET)); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, + "Failed to set inode" + " write params into inode ctx for %s", + uuid_utoa(buf->ia_gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + +unwind: + local->handler(frame, this); + return 0; +} + +int shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, + shard_post_fop_handler_t handler) { + int ret = -1; + shard_local_t *local = NULL; + dict_t *xattr_req = NULL; + gf_boolean_t need_refresh = _gf_false; + + local = frame->local; + local->handler = handler; + + ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, + &need_refresh); + /* By this time, inode ctx should have been created either in create, + * mknod, readdirp or lookup. If not it is a bug! + */ + if ((ret == 0) && (need_refresh == _gf_false)) { + gf_msg_debug(this->name, 0, "Skipping lookup on base file: %s" + "Serving prebuf off the inode ctx cache", + uuid_utoa(loc->gfid)); + goto out; + } + + xattr_req = dict_new(); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); + + STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + + dict_unref(xattr_req); + return 0; - /* To-Do: Fix refcount increment per call to - * shard_link_internal_dir_inode(). - */ - linked_inode = shard_link_internal_dir_inode(local, inode, buf, type); - shard_inode_ctx_mark_dir_refreshed(linked_inode, this); out: - shard_common_resolve_shards(frame, this, local->post_res_handler); - return 0; + if (xattr_req) + dict_unref(xattr_req); + handler(frame, this); + return 0; } -int -shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this, - shard_internal_dir_type_t type) -{ - loc_t loc = { - 0, - }; - inode_t *inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - uuid_t gfid = { - 0, - }; +int shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; - local = frame->local; - priv = this->private; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - gf_uuid_copy(gfid, priv->dot_shard_gfid); - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); - break; - default: - break; - } + local = frame->local; - inode = inode_find(this->itable, gfid); + if (local->op_ret >= 0) + shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, + SHARD_LOOKUP_MASK); - if (!shard_inode_ctx_needs_lookup(inode, this)) { - local->op_ret = 0; - goto out; - } + SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, + &local->prebuf, local->xattr_rsp); + return 0; +} - /* Plain assignment because the ref is already taken above through - * call to inode_find() - */ - loc.inode = inode; - gf_uuid_copy(loc.gfid, gfid); +int shard_post_stat_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; - STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc, - NULL); - loc_wipe(&loc); + local = frame->local; - return 0; + if (local->op_ret >= 0) + shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, + SHARD_LOOKUP_MASK); -out: - shard_common_resolve_shards(frame, this, local->post_res_handler); - return 0; + SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, + &local->prebuf, local->xattr_rsp); + return 0; } -int -shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - inode_t *link_inode = NULL; - shard_local_t *local = NULL; - shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; +int shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) { + inode_t *inode = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, + "stat failed: %s", local->fd ? uuid_utoa(local->fd->inode->gfid) + : uuid_utoa((local->loc.inode)->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } - if (!IA_ISDIR(buf->ia_type)) { - gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR, - "%s already exists and " - "is not a directory. Please remove it from all bricks " - "and try again", - shard_internal_dir_string(type)); - local->op_ret = -1; - local->op_errno = EIO; - goto unwind; - } + local->prebuf = *buf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + local->xattr_rsp = dict_ref(xdata); - link_inode = shard_link_internal_dir_inode(local, inode, buf, type); - if (link_inode != inode) { - shard_refresh_internal_dir(frame, this, type); - } else { - shard_inode_ctx_mark_dir_refreshed(link_inode, this); - shard_common_resolve_shards(frame, this, local->post_res_handler); - } - return 0; + if (local->loc.inode) + inode = local->loc.inode; + else + inode = local->fd->inode; + + shard_inode_ctx_invalidate(inode, this, &local->prebuf); unwind: - local->post_res_handler(frame, this); - return 0; + local->handler(frame, this); + return 0; } -int -shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t post_res_handler, - shard_internal_dir_type_t type) -{ - int ret = -1; - dict_t *xattr_req = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - uuid_t *gfid = NULL; - loc_t *loc = NULL; - gf_boolean_t free_gfid = _gf_true; - - local = frame->local; - priv = this->private; - local->post_res_handler = post_res_handler; - - gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); - if (!gfid) - goto err; - - xattr_req = dict_new(); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - gf_uuid_copy(*gfid, priv->dot_shard_gfid); - loc = &local->dot_shard_loc; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); - loc = &local->dot_shard_rm_loc; - break; - default: - bzero(*gfid, sizeof(uuid_t)); - break; - } +int shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set gfid of %s into dict", - shard_internal_dir_string(type)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } else { - free_gfid = _gf_false; - } + if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { + STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; + } - STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc, - xattr_req); + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } - dict_unref(xattr_req); + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + frame->local = local; + + local->handler = shard_post_stat_handler; + loc_copy(&local->loc, loc); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, + local, err); + + STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); + return 0; err: - if (xattr_req) - dict_unref(xattr_req); - if (free_gfid) - GF_FREE(gfid); - post_res_handler(frame, this); - return 0; + shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); + return 0; } -static void -shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata, - struct iatt *buf) -{ - int ret = 0; - uint64_t size = 0; - void *bsize = NULL; - - if (shard_inode_ctx_get_block_size(inode, this, &size)) { - /* Fresh lookup */ - ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); - if (!ret) - size = ntoh64(*((uint64_t *)bsize)); - /* If the file is sharded, set its block size, otherwise just - * set 0. - */ +int shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE); - } - /* If the file is sharded, also set the remaining attributes, - * except for ia_size and ia_blocks. - */ - if (size) { - shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); - (void)shard_inode_ctx_invalidate(inode, this, buf); - } -} + if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; + } -int -shard_delete_shards(void *opaque); + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } -int -shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data); + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; + } -int -shard_start_background_deletion(xlator_t *this) -{ - int ret = 0; - gf_boolean_t i_cleanup = _gf_true; - shard_priv_t *priv = NULL; - call_frame_t *cleanup_frame = NULL; + if (!this->itable) + this->itable = fd->inode->table; - priv = this->private; + local = mem_get0(this->local_pool); + if (!local) + goto err; - LOCK(&priv->lock); - { - switch (priv->bg_del_state) { - case SHARD_BG_DELETION_NONE: - i_cleanup = _gf_true; - priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; - break; - case SHARD_BG_DELETION_LAUNCHING: - i_cleanup = _gf_false; - break; - case SHARD_BG_DELETION_IN_PROGRESS: - priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; - i_cleanup = _gf_false; - break; - default: - break; - } - } - UNLOCK(&priv->lock); - if (!i_cleanup) - return 0; - - cleanup_frame = create_frame(this, this->ctx->pool); - if (!cleanup_frame) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create " - "new frame to delete shards"); - ret = -ENOMEM; - goto err; - } + frame->local = local; - set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); + local->handler = shard_post_fstat_handler; + local->fd = fd_ref(fd); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; - ret = synctask_new(this->ctx->env, shard_delete_shards, - shard_delete_shards_cbk, cleanup_frame, cleanup_frame); - if (ret < 0) { - gf_msg(this->name, GF_LOG_WARNING, errno, - SHARD_MSG_SHARDS_DELETION_FAILED, - "failed to create task to do background " - "cleanup of shards"); - STACK_DESTROY(cleanup_frame->root); - goto err; - } - return 0; + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); + STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); + return 0; err: - LOCK(&priv->lock); - { - priv->bg_del_state = SHARD_BG_DELETION_NONE; - } - UNLOCK(&priv->lock); - return ret; + shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); + return 0; } -int -shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) -{ - int ret = -1; - shard_priv_t *priv = NULL; - gf_boolean_t i_start_cleanup = _gf_false; +int shard_post_update_size_truncate_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - priv = this->private; + local = frame->local; - if (op_ret < 0) - goto unwind; + if (local->fop == GF_FOP_TRUNCATE) + SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, NULL); + else + SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, NULL); + return 0; +} - if (IA_ISDIR(buf->ia_type)) - goto unwind; +int shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { + inode_t *inode = NULL; + int64_t delta_blocks = 0; + shard_local_t *local = NULL; - /* Also, if the file is sharded, get the file size and block cnt xattr, - * and store them in the stbuf appropriately. - */ + local = frame->local; - if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) && - frame->root->pid != GF_CLIENT_PID_GSYNCD) - shard_modify_size_and_block_count(buf, xdata); - - /* If this was a fresh lookup, there are two possibilities: - * 1) If the file is sharded (indicated by the presence of block size - * xattr), store this block size, along with rdev and mode in its - * inode ctx. - * 2) If the file is not sharded, store size along with rdev and mode - * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is - * already initialised to all zeroes, nothing more needs to be done. - */ + SHARD_UNSET_ROOT_FS_ID(frame, local); - (void)shard_inode_ctx_update(inode, this, xdata, buf); + inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, "truncate on last" + " shard failed : %s", + uuid_utoa(inode->gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + + local->postbuf.ia_size = local->offset; + /* Let the delta be negative. We want xattrop to do subtraction */ + local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; + delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, + postbuf->ia_blocks - prebuf->ia_blocks); + GF_ASSERT(delta_blocks <= 0); + local->postbuf.ia_blocks += delta_blocks; + local->hole_size = 0; + + shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; +} + +int shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, + inode_t *inode) { + size_t last_shard_size_after = 0; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; + + local = frame->local; + + /* A NULL inode could be due to the fact that the last shard which + * needs to be truncated does not exist due to it lying in a hole + * region. So the only thing left to do in that case would be an + * update to file size xattr. + */ + if (!inode) { + gf_msg_debug(this->name, 0, + "Last shard to be truncated absent" + " in backend: %s. Directly proceeding to update " + "file size", + uuid_utoa(inode->gfid)); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } - LOCK(&priv->lock); - { - if (priv->first_lookup_done == _gf_false) { - priv->first_lookup_done = _gf_true; - i_start_cleanup = _gf_true; - } - } - UNLOCK(&priv->lock); + SHARD_SET_ROOT_FS_ID(frame, local); - if (!i_start_cleanup) - goto unwind; + loc.inode = inode_ref(inode); + gf_uuid_copy(loc.gfid, inode->gfid); - ret = shard_start_background_deletion(this); - if (ret < 0) { - LOCK(&priv->lock); - { - priv->first_lookup_done = _gf_false; - } - UNLOCK(&priv->lock); - } + last_shard_size_after = (local->offset % local->block_size); -unwind: - SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata, - postparent); - return 0; + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, + NULL); + loc_wipe(&loc); + return 0; } -int -shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) -{ - int ret = -1; - int32_t op_errno = ENOMEM; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - this->itable = loc->inode->table; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - SHARD_ENTRY_FOP_CHECK(loc, op_errno, err); - } +void shard_unlink_block_inode(shard_local_t *local, int shard_block_num); - local = mem_get0(this->local_pool); - if (!local) - goto err; +int shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { + int ret = 0; + int call_count = 0; + int shard_block_num = (long)cookie; + uint64_t block_count = 0; + shard_local_t *local = NULL; - frame->local = local; + local = frame->local; - loc_copy(&local->loc, loc); + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } + ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); + if (!ret) { + GF_ATOMIC_SUB(local->delta_blocks, block_count); + } else { + /* dict_get failed possibly due to a heterogeneous cluster? */ + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get key %s from dict during truncate of gfid %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + } + + shard_unlink_block_inode(local, shard_block_num); +done: + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + shard_truncate_last_shard(frame, this, local->inode_list[0]); + } + return 0; +} + +int shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) { + int i = 1; + int ret = -1; + int call_count = 0; + uint32_t cur_block = 0; + uint32_t last_block = 0; + char path[PATH_MAX] = { + 0, + }; + char *bname = NULL; + loc_t loc = { + 0, + }; + gf_boolean_t wind_failed = _gf_false; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + dict_t *xdata_req = NULL; + + local = frame->local; + priv = this->private; + + cur_block = local->first_block + 1; + last_block = local->last_block; + + /* Determine call count */ + for (i = 1; i < local->num_blocks; i++) { + if (!local->inode_list[i]) + continue; + call_count++; + } + + if (!call_count) { + /* Call count = 0 implies that all of the shards that need to be + * unlinked do not exist. So shard xlator would now proceed to + * do the final truncate + size updates. + */ + gf_msg_debug(this->name, 0, "Shards to be unlinked as part of " + "truncate absent in backend: %s. Directly " + "proceeding to update file size", + uuid_utoa(inode->gfid)); + local->postbuf.ia_size = local->offset; + local->postbuf.ia_blocks = local->prebuf.ia_blocks; + local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; + GF_ATOMIC_INIT(local->delta_blocks, 0); + local->hole_size = 0; + shard_update_file_size(frame, this, local->fd, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } - local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new(); - if (!local->xattr_req) - goto err; + local->call_count = call_count; + i = 1; + xdata_req = dict_new(); + if (!xdata_req) { + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } + ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set key %s into dict during truncate of %s", + GF_GET_FILE_BLOCK_COUNT, + uuid_utoa(local->resolver_base_inode->gfid)); + dict_unref(xdata_req); + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } - if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) { - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set dict" - " value: key:%s for path %s", - GF_XATTR_SHARD_BLOCK_SIZE, loc->path); - goto err; - } + SHARD_SET_ROOT_FS_ID(frame, local); + while (cur_block <= last_block) { + if (!local->inode_list[i]) { + cur_block++; + i++; + continue; + } + if (wind_failed) { + shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, + NULL, NULL, NULL); + goto next; } - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, - 8 * 4); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set dict value: key:%s for path %s.", - GF_XATTR_SHARD_FILE_SIZE, loc->path); - goto err; - } + shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); + bname = strrchr(path, '/') + 1; + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s. Base file gfid = %s", + bname, uuid_utoa(inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, ENOMEM, + NULL, NULL, NULL); + goto next; } + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + loc.inode = inode_ref(local->inode_list[i]); - if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY))) - dict_del(xattr_req, GF_CONTENT_KEY); + STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, (void *)(long)cur_block, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, &loc, + 0, xdata_req); + loc_wipe(&loc); + next: + i++; + cur_block++; + if (!--call_count) + break; + } + dict_unref(xdata_req); + return 0; +} - STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req); - return 0; -err: - shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno); +int shard_truncate_do(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; + + local = frame->local; + + if (local->num_blocks == 1) { + /* This means that there are no shards to be unlinked. + * The fop boils down to truncating the last shard, updating + * the size and unwinding. + */ + shard_truncate_last_shard(frame, this, local->inode_list[0]); return 0; + } else { + shard_truncate_htol(frame, this, local->loc.inode); + } + return 0; } -int -shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - int ret = -1; - int32_t mask = SHARD_INODE_WRITE_MASK; - shard_local_t *local = NULL; - shard_inode_ctx_t ctx = { - 0, - }; - - local = frame->local; +int shard_post_lookup_shards_truncate_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_BASE_FILE_LOOKUP_FAILED, - "Lookup on base file" - " failed : %s", - loc_gfid_utoa(&(local->loc))); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } + local = frame->local; - local->prebuf = *buf; - if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } + + shard_truncate_do(frame, this); + return 0; +} + +void shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, + struct iatt *buf) { + int list_index = 0; + char block_bname[256] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *linked_inode = NULL; + xlator_t *this = NULL; + inode_t *fsync_inode = NULL; + shard_priv_t *priv = NULL; + inode_t *base_inode = NULL; + + this = THIS; + priv = this->private; + if (local->loc.inode) { + gf_uuid_copy(gfid, local->loc.inode->gfid); + base_inode = local->loc.inode; + } else if (local->resolver_base_inode) { + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + base_inode = local->resolver_base_inode; + } else { + gf_uuid_copy(gfid, local->base_gfid); + } + + shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); + + shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); + linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); + inode_lookup(linked_inode); + list_index = block_num - local->first_block; + local->inode_list[list_index] = linked_inode; + + LOCK(&priv->lock); + { + fsync_inode = __shard_update_shards_inode_list(linked_inode, this, + base_inode, block_num, gfid); + } + UNLOCK(&priv->lock); + if (fsync_inode) + shard_initiate_evicted_inode_fsync(this, fsync_inode); +} + +int shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { + int call_count = 0; + int shard_block_num = (long)cookie; + uuid_t gfid = { + 0, + }; + shard_local_t *local = NULL; + + local = frame->local; + if (local->resolver_base_inode) + gf_uuid_copy(gfid, local->resolver_base_inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + if (op_ret < 0) { + /* Ignore absence of shards in the backend in truncate fop. */ + switch (local->fop) { + case GF_FOP_TRUNCATE: + case GF_FOP_FTRUNCATE: + case GF_FOP_RENAME: + case GF_FOP_UNLINK: + if (op_errno == ENOENT) + goto done; + break; + case GF_FOP_WRITE: + case GF_FOP_READ: + case GF_FOP_ZEROFILL: + case GF_FOP_DISCARD: + case GF_FOP_FALLOCATE: + if ((!local->first_lookup_done) && (op_errno == ENOENT)) { + LOCK(&frame->lock); + { local->create_count++; } + UNLOCK(&frame->lock); + goto done; + } + break; + default: + break; } - if (shard_inode_ctx_get_all(inode, this, &ctx)) - mask = SHARD_ALL_MASK; + /* else */ + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_LOOKUP_SHARD_FAILED, + "Lookup on shard %d " + "failed. Base file gfid = %s", + shard_block_num, uuid_utoa(gfid)); + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } - ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0, - (mask | SHARD_MASK_REFRESH_RESET)); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0, - "Failed to set inode" - " write params into inode ctx for %s", - uuid_utoa(buf->ia_gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } + shard_link_block_inode(local, shard_block_num, inode, buf); -unwind: - local->handler(frame, this); +done: + if (local->lookup_shards_barriered) { + syncbarrier_wake(&local->barrier); return 0; + } else { + call_count = shard_call_count_return(frame); + if (call_count == 0) { + if (!local->first_lookup_done) + local->first_lookup_done = _gf_true; + local->pls_fop_handler(frame, this); + } + } + return 0; } -int -shard_lookup_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc, - shard_post_fop_handler_t handler) -{ - int ret = -1; - shard_local_t *local = NULL; - dict_t *xattr_req = NULL; - gf_boolean_t need_refresh = _gf_false; +dict_t *shard_create_gfid_dict(dict_t *dict) { + int ret = 0; + dict_t *new = NULL; + unsigned char *gfid = NULL; - local = frame->local; - local->handler = handler; + new = dict_copy_with_ref(dict, NULL); + if (!new) + return NULL; - ret = shard_inode_ctx_fill_iatt_from_cache(loc->inode, this, &local->prebuf, - &need_refresh); - /* By this time, inode ctx should have been created either in create, - * mknod, readdirp or lookup. If not it is a bug! - */ - if ((ret == 0) && (need_refresh == _gf_false)) { - gf_msg_debug(this->name, 0, - "Skipping lookup on base file: %s" - "Serving prebuf off the inode ctx cache", - uuid_utoa(loc->gfid)); - goto out; + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); + if (!gfid) { + ret = -1; + goto out; + } + + gf_uuid_generate(gfid); + + ret = dict_set_gfuuid(new, "gfid-req", gfid, false); + +out: + if (ret) { + dict_unref(new); + new = NULL; + GF_FREE(gfid); + } + + return new; +} + +int shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, + inode_t *inode, + shard_post_lookup_shards_fop_handler_t handler) { + int i = 0; + int ret = 0; + int count = 0; + int call_count = 0; + int32_t shard_idx_iter = 0; + int last_block = 0; + char path[PATH_MAX] = { + 0, + }; + char *bname = NULL; + uuid_t gfid = { + 0, + }; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + gf_boolean_t wind_failed = _gf_false; + dict_t *xattr_req = NULL; + + priv = this->private; + local = frame->local; + count = call_count = local->call_count; + shard_idx_iter = local->first_block; + last_block = local->last_block; + local->pls_fop_handler = handler; + if (local->lookup_shards_barriered) + local->barrier.waitfor = local->call_count; + + if (inode) + gf_uuid_copy(gfid, inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + while (shard_idx_iter <= last_block) { + if (local->inode_list[i]) { + i++; + shard_idx_iter++; + continue; + } + + if (wind_failed) { + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL); + goto next; + } + + shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); + + bname = strrchr(path, '/') + 1; + loc.inode = inode_new(this->itable); + loc.parent = inode_ref(priv->dot_shard_inode); + gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0 || !(loc.inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s, base file gfid = %s", + bname, uuid_utoa(gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL); + goto next; } - xattr_req = dict_new(); + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + + xattr_req = shard_create_gfid_dict(local->xattr_req); if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto out; + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + loc_wipe(&loc); + shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, this, + -1, ENOMEM, NULL, NULL, NULL, NULL); + goto next; + } + + STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, + (void *)(long)shard_idx_iter, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); + loc_wipe(&loc); + dict_unref(xattr_req); + next: + shard_idx_iter++; + i++; + + if (!--call_count) + break; + } + if (local->lookup_shards_barriered) { + syncbarrier_wait(&local->barrier, count); + local->pls_fop_handler(frame, this); + } + return 0; +} + +int shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; + + local = frame->local; + + if (local->op_ret < 0) { + if (local->op_errno == ENOENT) { + /* If lookup on /.shard fails with ENOENT, it means that + * the file was 0-byte in size but truncated sometime in + * the past to a higher size which is reflected in the + * size xattr, and now being truncated to a lower size. + * In this case, the only thing that needs to be done is + * to update the size xattr of the file and unwind. + */ + local->first_block = local->last_block = 0; + local->num_blocks = 1; + local->call_count = 0; + local->op_ret = 0; + local->postbuf.ia_size = local->offset; + shard_update_file_size(frame, this, local->fd, &local->loc, + shard_post_update_size_truncate_handler); + return 0; + } else { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; } + } - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, loc->gfid, local, out); + if (!local->call_count) + shard_truncate_do(frame, this); + else + shard_common_lookup_shards(frame, this, local->loc.inode, + shard_post_lookup_shards_truncate_handler); + + return 0; +} + +int shard_truncate_begin(call_frame_t *frame, xlator_t *this) { + int ret = 0; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + /* First participant block here is the lowest numbered block that would + * hold the last byte of the file post successful truncation. + * Last participant block is the block that contains the last byte in + * the current state of the file. + * If (first block == last_block): + * then that means that the file only needs truncation of the + * first (or last since both are same) block. + * Else + * if (new_size % block_size == 0) + * then that means there is no truncate to be done with + * only shards from first_block + 1 through the last + * block needing to be unlinked. + * else + * both truncate of the first block and unlink of the + * remaining shards until end of file is required. + */ + local->first_block = + (local->offset == 0) ? 0 : get_lowest_block(local->offset - 1, + local->block_size); + local->last_block = + get_highest_block(0, local->prebuf.ia_size, local->block_size); + + local->num_blocks = local->last_block - local->first_block + 1; + local->resolver_base_inode = + (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode : local->fd->inode; + + if ((local->first_block == 0) && (local->num_blocks == 1)) { + if (local->fop == GF_FOP_TRUNCATE) + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, &local->loc, local->offset, + local->xattr_req); + else + STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, local->fd, local->offset, + local->xattr_req); + return 0; + } - STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + local->inode_list = + GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); + if (!local->inode_list) + goto err; - dict_unref(xattr_req); - return 0; + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + ret = + shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto err; + shard_lookup_internal_dir(frame, this, shard_post_resolve_truncate_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_truncate_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; -out: - if (xattr_req) - dict_unref(xattr_req); - handler(frame, this); - return 0; +err: + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; } -int -shard_post_fstat_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; +int shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; + struct iatt tmp_stbuf = { + 0, + }; - local = frame->local; - - if (local->op_ret >= 0) - shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0, - SHARD_LOOKUP_MASK); + local = frame->local; - SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, - &local->prebuf, local->xattr_rsp); + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; + } + + local->postbuf = tmp_stbuf = local->prebuf; + + if (local->prebuf.ia_size == local->offset) { + /* If the file size is same as requested size, unwind the call + * immediately. + */ + if (local->fop == GF_FOP_TRUNCATE) + SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, &local->postbuf, + NULL); + else + SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, + &local->postbuf, NULL); + } else if (local->offset > local->prebuf.ia_size) { + /* If the truncate is from a lower to a higher size, set the + * new size xattr and unwind. + */ + local->hole_size = local->offset - local->prebuf.ia_size; + local->delta_size = 0; + GF_ATOMIC_INIT(local->delta_blocks, 0); + local->postbuf.ia_size = local->offset; + tmp_stbuf.ia_size = local->offset; + shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, + SHARD_INODE_WRITE_MASK); + shard_update_file_size(frame, this, NULL, &local->loc, + shard_post_update_size_truncate_handler); + } else { + /* ... else + * i. unlink all shards that need to be unlinked. + * ii. truncate the last of the shards. + * iii. update the new size using setxattr. + * and unwind the fop. + */ + local->hole_size = 0; + local->delta_size = (local->offset - local->prebuf.ia_size); + GF_ATOMIC_INIT(local->delta_blocks, 0); + tmp_stbuf.ia_size = local->offset; + shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, + SHARD_INODE_WRITE_MASK); + shard_truncate_begin(frame, this); + } + return 0; } -int -shard_post_stat_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; +/* TO-DO: + * Fix updates to size and block count with racing write(s) and truncate(s). + */ - local = frame->local; +int shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - if (local->op_ret >= 0) - shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0, - SHARD_LOOKUP_MASK); + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } - SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, - &local->prebuf, local->xattr_rsp); + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); return 0; -} + } -int -shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) -{ - inode_t *inode = NULL; - shard_local_t *local = NULL; + if (!this->itable) + this->itable = loc->inode->table; - local = frame->local; + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + loc_copy(&local->loc, loc); + local->offset = offset; + local->block_size = block_size; + local->fop = GF_FOP_TRUNCATE; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->resolver_base_inode = loc->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_truncate_handler); + return 0; - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED, - "stat failed: %s", - local->fd ? uuid_utoa(local->fd->inode->gfid) - : uuid_utoa((local->loc.inode)->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } +err: + shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); + return 0; +} + +int shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + local->fd = fd_ref(fd); + local->offset = offset; + local->block_size = block_size; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_FTRUNCATE; + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + local->resolver_base_inode = fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_truncate_handler); + return 0; +err: + shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); + return 0; +} - local->prebuf = *buf; - if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; - } - local->xattr_rsp = dict_ref(xdata); +int shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { + int ret = -1; + shard_local_t *local = NULL; - if (local->loc.inode) - inode = local->loc.inode; - else - inode = local->fd->inode; + local = frame->local; - shard_inode_ctx_invalidate(inode, this, &local->prebuf); + if (op_ret == -1) + goto unwind; + + ret = + shard_inode_ctx_set(inode, this, buf, local->block_size, SHARD_ALL_MASK); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, + "Failed to set inode " + "ctx for %s", + uuid_utoa(inode->gfid)); unwind: - local->handler(frame, this); - return 0; -} + SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); -int -shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; + return 0; +} - if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { - STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; - } +int shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) { + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; - ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(loc->inode->gfid)); - goto err; - } + priv = this->private; + local = mem_get0(this->local_pool); + if (!local) + goto err; - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, xdata); - return 0; - } + frame->local = local; + local->block_size = priv->block_size; + if (!__is_gsyncd_on_shard_dir(frame, loc)) { + SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); + } - local = mem_get0(this->local_pool); - if (!local) - goto err; + STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; +err: + shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); + return 0; +} - frame->local = local; +int32_t shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { + shard_local_t *local = NULL; - local->handler = shard_post_stat_handler; - loc_copy(&local->loc, loc); - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; + local = frame->local; + if (op_ret < 0) + goto err; - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, - local, err); + shard_inode_ctx_set(inode, this, buf, 0, SHARD_MASK_NLINK | SHARD_MASK_TIMES); + buf->ia_size = local->prebuf.ia_size; + buf->ia_blocks = local->prebuf.ia_blocks; - STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc, local->xattr_req); - return 0; + SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; err: - shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); + return 0; } -int -shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; +int shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; - if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { - STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; - } + local = frame->local; - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(fd->inode->gfid)); - goto err; - } + if (local->op_ret < 0) { + SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, NULL, + NULL, NULL, NULL); + return 0; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, xdata); - return 0; - } + STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, + local->xattr_req); + return 0; +} - if (!this->itable) - this->itable = fd->inode->table; +int32_t shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - local = mem_get0(this->local_pool); - if (!local) - goto err; + ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(oldloc->inode->gfid)); + goto err; + } - frame->local = local; + if (!block_size) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + return 0; + } - local->handler = shard_post_fstat_handler; - local->fd = fd_ref(fd); - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; + if (!this->itable) + this->itable = oldloc->inode->table; - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, - local, err); + local = mem_get0(this->local_pool); + if (!local) + goto err; - STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req); - return 0; + frame->local = local; + + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_link_handler); + return 0; err: - shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); + return 0; } -int -shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; +int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); - local = frame->local; +int shard_post_lookup_shards_unlink_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, NULL); - else - SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, NULL); + local = frame->local; + + if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { + gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, + "failed to delete shards of %s", + uuid_utoa(local->resolver_base_inode->gfid)); return 0; -} + } + local->op_ret = 0; + local->op_errno = 0; -int -shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - inode_t *inode = NULL; - int64_t delta_blocks = 0; - shard_local_t *local = NULL; + shard_unlink_shards_do(frame, this, local->resolver_base_inode); + return 0; +} - local = frame->local; +int shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; - SHARD_UNSET_ROOT_FS_ID(frame, local); + local = frame->local; + local->lookup_shards_barriered = _gf_true; - inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode - : local->fd->inode; - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED, - "truncate on last" - " shard failed : %s", - uuid_utoa(inode->gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } + if (!local->call_count) + shard_unlink_shards_do(frame, this, local->resolver_base_inode); + else + shard_common_lookup_shards(frame, this, local->resolver_base_inode, + shard_post_lookup_shards_unlink_handler); + return 0; +} + +void shard_unlink_block_inode(shard_local_t *local, int shard_block_num) { + char block_bname[256] = { + 0, + }; + uuid_t gfid = { + 0, + }; + inode_t *inode = NULL; + inode_t *base_inode = NULL; + xlator_t *this = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *base_ictx = NULL; + int unref_base_inode = 0; + int unref_shard_inode = 0; + + this = THIS; + priv = this->private; + + inode = local->inode_list[shard_block_num - local->first_block]; + shard_inode_ctx_get(inode, this, &ctx); + base_inode = ctx->base_inode; + if (base_inode) + gf_uuid_copy(gfid, base_inode->gfid); + else + gf_uuid_copy(gfid, ctx->base_gfid); + shard_make_block_bname(shard_block_num, gfid, block_bname, + sizeof(block_bname)); + + LOCK(&priv->lock); + if (base_inode) + LOCK(&base_inode->lock); + LOCK(&inode->lock); + { + __shard_inode_ctx_get(inode, this, &ctx); + if (!list_empty(&ctx->ilist)) { + list_del_init(&ctx->ilist); + priv->inode_count--; + unref_base_inode++; + unref_shard_inode++; + GF_ASSERT(priv->inode_count >= 0); + } + if (ctx->fsync_needed) { + unref_base_inode++; + unref_shard_inode++; + list_del_init(&ctx->to_fsync_list); + if (base_inode) { + __shard_inode_ctx_get(base_inode, this, &base_ictx); + base_ictx->fsync_count--; + } + } + } + UNLOCK(&inode->lock); + if (base_inode) + UNLOCK(&base_inode->lock); - local->postbuf.ia_size = local->offset; - /* Let the delta be negative. We want xattrop to do subtraction */ - local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - delta_blocks = GF_ATOMIC_ADD(local->delta_blocks, - postbuf->ia_blocks - prebuf->ia_blocks); - GF_ASSERT(delta_blocks <= 0); - local->postbuf.ia_blocks += delta_blocks; - local->hole_size = 0; + inode_unlink(inode, priv->dot_shard_inode, block_bname); + inode_ref_reduce_by_n(inode, unref_shard_inode); + inode_forget(inode, 0); - shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES); - shard_update_file_size(frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; + if (base_inode && unref_base_inode) + inode_ref_reduce_by_n(base_inode, unref_base_inode); + UNLOCK(&priv->lock); } -int -shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode) -{ - size_t last_shard_size_after = 0; - loc_t loc = { - 0, - }; - shard_local_t *local = NULL; +int shard_rename_cbk(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - /* A NULL inode could be due to the fact that the last shard which - * needs to be truncated does not exist due to it lying in a hole - * region. So the only thing left to do in that case would be an - * update to file size xattr. - */ - if (!inode) { - gf_msg_debug(this->name, 0, - "Last shard to be truncated absent" - " in backend: %s. Directly proceeding to update " - "file size", - uuid_utoa(inode->gfid)); - shard_update_file_size(frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); - return 0; - } + SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->preoldparent, + &local->postoldparent, &local->prenewparent, + &local->postnewparent, local->xattr_rsp); + return 0; +} - SHARD_SET_ROOT_FS_ID(frame, local); +int32_t shard_unlink_cbk(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = frame->local; - loc.inode = inode_ref(inode); - gf_uuid_copy(loc.gfid, inode->gfid); + SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preoldparent, &local->postoldparent, + local->xattr_rsp); + return 0; +} - last_shard_size_after = (local->offset % local->block_size); +int shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { + int shard_block_num = (long)cookie; + shard_local_t *local = NULL; - STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after, - NULL); - loc_wipe(&loc); - return 0; -} + local = frame->local; -void -shard_unlink_block_inode(shard_local_t *local, int shard_block_num); + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto done; + } -int -shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - int ret = 0; - int call_count = 0; - int shard_block_num = (long)cookie; - uint64_t block_count = 0; - shard_local_t *local = NULL; + shard_unlink_block_inode(local, shard_block_num); +done: + syncbarrier_wake(&local->barrier); + return 0; +} + +int shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, + inode_t *inode) { + int i = 0; + int ret = -1; + int count = 0; + uint32_t cur_block = 0; + uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ + char *bname = NULL; + char path[PATH_MAX] = { + 0, + }; + uuid_t gfid = { + 0, + }; + loc_t loc = { + 0, + }; + gf_boolean_t wind_failed = _gf_false; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; + + if (inode) + gf_uuid_copy(gfid, inode->gfid); + else + gf_uuid_copy(gfid, local->base_gfid); + + for (i = 0; i < local->num_blocks; i++) { + if (!local->inode_list[i]) + continue; + count++; + } + + if (!count) { + /* callcount = 0 implies that all of the shards that need to be + * unlinked are non-existent (in other words the file is full of + * holes). + */ + gf_msg_debug(this->name, 0, "All shards that need to be " + "unlinked are non-existent: %s", + uuid_utoa(gfid)); + return 0; + } - local = frame->local; + SHARD_SET_ROOT_FS_ID(frame, local); + local->barrier.waitfor = count; + cur_block = cur_block_idx + local->first_block; - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto done; - } - ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count); - if (!ret) { - GF_ATOMIC_SUB(local->delta_blocks, block_count); - } else { - /* dict_get failed possibly due to a heterogeneous cluster? */ - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to get key %s from dict during truncate of gfid %s", - GF_GET_FILE_BLOCK_COUNT, - uuid_utoa(local->resolver_base_inode->gfid)); - } - - shard_unlink_block_inode(local, shard_block_num); -done: - call_count = shard_call_count_return(frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID(frame, local); - shard_truncate_last_shard(frame, this, local->inode_list[0]); - } - return 0; -} - -int -shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode) -{ - int i = 1; - int ret = -1; - int call_count = 0; - uint32_t cur_block = 0; - uint32_t last_block = 0; - char path[PATH_MAX] = { - 0, - }; - char *bname = NULL; - loc_t loc = { - 0, - }; - gf_boolean_t wind_failed = _gf_false; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - dict_t *xdata_req = NULL; - - local = frame->local; - priv = this->private; - - cur_block = local->first_block + 1; - last_block = local->last_block; - - /* Determine call count */ - for (i = 1; i < local->num_blocks; i++) { - if (!local->inode_list[i]) - continue; - call_count++; - } + while (cur_block_idx < local->num_blocks) { + if (!local->inode_list[cur_block_idx]) + goto next; - if (!call_count) { - /* Call count = 0 implies that all of the shards that need to be - * unlinked do not exist. So shard xlator would now proceed to - * do the final truncate + size updates. - */ - gf_msg_debug(this->name, 0, - "Shards to be unlinked as part of " - "truncate absent in backend: %s. Directly " - "proceeding to update file size", - uuid_utoa(inode->gfid)); - local->postbuf.ia_size = local->offset; - local->postbuf.ia_blocks = local->prebuf.ia_blocks; - local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size; - GF_ATOMIC_INIT(local->delta_blocks, 0); - local->hole_size = 0; - shard_update_file_size(frame, this, local->fd, &local->loc, - shard_post_update_size_truncate_handler); - return 0; + if (wind_failed) { + shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; } - local->call_count = call_count; - i = 1; - xdata_req = dict_new(); - if (!xdata_req) { - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; - } - ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set key %s into dict during truncate of %s", - GF_GET_FILE_BLOCK_COUNT, - uuid_utoa(local->resolver_base_inode->gfid)); - dict_unref(xdata_req); - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; + shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); + bname = strrchr(path, '/') + 1; + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on %s, base file gfid = %s", + bname, uuid_utoa(gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + loc_wipe(&loc); + wind_failed = _gf_true; + shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; } - SHARD_SET_ROOT_FS_ID(frame, local); - while (cur_block <= last_block) { - if (!local->inode_list[i]) { - cur_block++; - i++; - continue; - } - if (wind_failed) { - shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } - - shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path)); - bname = strrchr(path, '/') + 1; - loc.parent = inode_ref(priv->dot_shard_inode); - ret = inode_path(loc.parent, bname, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - " on %s. Base file gfid = %s", - bname, uuid_utoa(inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe(&loc); - wind_failed = _gf_true; - shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - loc.inode = inode_ref(local->inode_list[i]); - - STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk, - (void *)(long)cur_block, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req); - loc_wipe(&loc); - next: - i++; - cur_block++; - if (!--call_count) - break; - } - dict_unref(xdata_req); - return 0; -} + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + loc.inode = inode_ref(local->inode_list[cur_block_idx]); -int -shard_truncate_do(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; + STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, + (void *)(long)cur_block, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, + local->xattr_req); + loc_wipe(&loc); + next: + cur_block++; + cur_block_idx++; + } + syncbarrier_wait(&local->barrier, count); + SHARD_UNSET_ROOT_FS_ID(frame, local); + return 0; +} + +int shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, + int now, int first_block, + gf_dirent_t *entry) { + int i = 0; + int ret = 0; + shard_local_t *local = NULL; + uuid_t gfid = { + 0, + }; + + local = cleanup_frame->local; + + local->inode_list = GF_CALLOC(now, sizeof(inode_t *), gf_shard_mt_inode_list); + if (!local->inode_list) + return -ENOMEM; + + local->first_block = first_block; + local->last_block = first_block + now - 1; + local->num_blocks = now; + gf_uuid_parse(entry->d_name, gfid); + gf_uuid_copy(local->base_gfid, gfid); + local->resolver_base_inode = inode_find(this->itable, gfid); + local->call_count = 0; + ret = syncbarrier_init(&local->barrier); + if (ret) { + GF_FREE(local->inode_list); + local->inode_list = NULL; + inode_unref(local->resolver_base_inode); + local->resolver_base_inode = NULL; + return -errno; + } + shard_common_resolve_shards(cleanup_frame, this, + shard_post_resolve_unlink_handler); + + for (i = 0; i < local->num_blocks; i++) { + if (local->inode_list[i]) + inode_unref(local->inode_list[i]); + } + GF_FREE(local->inode_list); + local->inode_list = NULL; + if (local->op_ret) + ret = -local->op_errno; + syncbarrier_destroy(&local->barrier); + inode_unref(local->resolver_base_inode); + local->resolver_base_inode = NULL; + STACK_RESET(cleanup_frame->root); + return ret; +} + +int __shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, + gf_dirent_t *entry, inode_t *inode) { + int ret = 0; + int shard_count = 0; + int first_block = 0; + int now = 0; + uint64_t size = 0; + uint64_t block_size = 0; + uint64_t size_array[4] = { + 0, + }; + void *bsize = NULL; + void *size_attr = NULL; + dict_t *xattr_rsp = NULL; + loc_t loc = { + 0, + }; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = cleanup_frame->local; + ret = dict_reset(local->xattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to reset dict"); + ret = -ENOMEM; + goto err; + } + + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); + ret = -ENOMEM; + goto err; + } + + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); + ret = -ENOMEM; + goto err; + } + + loc.inode = inode_ref(inode); + loc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, + &xattr_rsp); + if (ret) + goto err; + + ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); + goto err; + } + block_size = ntoh64(*((uint64_t *)bsize)); + + ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); + goto err; + } + + memcpy(size_array, size_attr, sizeof(size_array)); + size = ntoh64(size_array[0]); + + shard_count = (size / block_size) - 1; + if (shard_count < 0) { + gf_msg_debug(this->name, 0, "Size of %s hasn't grown beyond " + "its shard-block-size. Nothing to delete. " + "Returning", + entry->d_name); + /* File size < shard-block-size, so nothing to delete */ + ret = 0; + goto delete_marker; + } + if ((size % block_size) > 0) + shard_count++; + + if (shard_count == 0) { + gf_msg_debug(this->name, 0, "Size of %s is exactly equal to " + "its shard-block-size. Nothing to delete. " + "Returning", + entry->d_name); + ret = 0; + goto delete_marker; + } + gf_msg_debug(this->name, 0, + "base file = %s, " + "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 ", " + "shard_count=%d", + entry->d_name, block_size, size, shard_count); + + /* Perform a gfid-based lookup to see if gfid corresponding to marker + * file's base name exists. + */ + loc_wipe(&loc); + loc.inode = inode_new(this->itable); + if (!loc.inode) { + ret = -ENOMEM; + goto err; + } + gf_uuid_parse(entry->d_name, loc.gfid); + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); + if (!ret) { + gf_msg_debug(this->name, 0, "Base shard corresponding to gfid " + "%s is present. Skipping shard deletion. " + "Returning", + entry->d_name); + ret = 0; + goto delete_marker; + } - local = frame->local; + first_block = 1; - if (local->num_blocks == 1) { - /* This means that there are no shards to be unlinked. - * The fop boils down to truncating the last shard, updating - * the size and unwinding. - */ - shard_truncate_last_shard(frame, this, local->inode_list[0]); - return 0; + while (shard_count) { + if (shard_count < local->deletion_rate) { + now = shard_count; + shard_count = 0; } else { - shard_truncate_htol(frame, this, local->loc.inode); - } - return 0; -} - -int -shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; + now = local->deletion_rate; + shard_count -= local->deletion_rate; } - shard_truncate_do(frame, this); - return 0; -} + gf_msg_debug(this->name, 0, "deleting %d shards starting from " + "block %d of gfid %s", + now, first_block, entry->d_name); + ret = shard_regulated_shards_deletion(cleanup_frame, this, now, first_block, + entry); + if (ret) + goto err; + first_block += now; + } -void -shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, - struct iatt *buf) -{ - int list_index = 0; - char block_bname[256] = { - 0, - }; - uuid_t gfid = { - 0, - }; - inode_t *linked_inode = NULL; - xlator_t *this = NULL; - inode_t *fsync_inode = NULL; - shard_priv_t *priv = NULL; - inode_t *base_inode = NULL; - - this = THIS; - priv = this->private; - if (local->loc.inode) { - gf_uuid_copy(gfid, local->loc.inode->gfid); - base_inode = local->loc.inode; - } else if (local->resolver_base_inode) { - gf_uuid_copy(gfid, local->resolver_base_inode->gfid); - base_inode = local->resolver_base_inode; +delete_marker: + loc_wipe(&loc); + loc.inode = inode_ref(inode); + loc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); + if (ret) + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to delete %s " + "from /%s", + entry->d_name, GF_SHARD_REMOVE_ME_DIR); +err: + if (xattr_rsp) + dict_unref(xattr_rsp); + loc_wipe(&loc); + return ret; +} + +int shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, + gf_dirent_t *entry, inode_t *inode) { + int ret = -1; + loc_t loc = { + 0, + }; + shard_priv_t *priv = NULL; + + priv = this->private; + loc.inode = inode_ref(priv->dot_shard_rm_inode); + + ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, + ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); + if (ret < 0) { + if (ret == -EAGAIN) { + ret = 0; + } + goto out; + } + { ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); } + syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); +out: + loc_wipe(&loc); + return ret; +} + +int shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) { + SHARD_STACK_DESTROY(frame); + return 0; +} + +int shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, + shard_internal_dir_type_t type) { + int ret = 0; + char *bname = NULL; + loc_t *loc = NULL; + shard_priv_t *priv = NULL; + uuid_t gfid = { + 0, + }; + struct iatt stbuf = { + 0, + }; + + priv = this->private; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + loc = &local->dot_shard_loc; + gf_uuid_copy(gfid, priv->dot_shard_gfid); + bname = GF_SHARD_DIR; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + loc = &local->dot_shard_rm_loc; + gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); + bname = GF_SHARD_REMOVE_ME_DIR; + break; + default: + break; + } + + loc->inode = inode_find(this->itable, gfid); + if (!loc->inode) { + ret = shard_init_internal_dir_loc(this, local, type); + if (ret) + goto err; + ret = dict_reset(local->xattr_req); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to reset " + "dict"); + ret = -ENOMEM; + goto err; + } + ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); + ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, local->xattr_req, + NULL); + if (ret < 0) { + if (ret != -ENOENT) + gf_msg(this->name, GF_LOG_ERROR, -ret, SHARD_MSG_SHARDS_DELETION_FAILED, + "Lookup on %s failed, exiting", bname); + goto err; } else { - gf_uuid_copy(gfid, local->base_gfid); + shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); } + } + ret = 0; +err: + return ret; +} + +int shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, + gf_dirent_t *entry) { + int ret = 0; + loc_t loc = { + 0, + }; + + loc.inode = inode_new(this->itable); + if (!loc.inode) { + ret = -ENOMEM; + goto err; + } + loc.parent = inode_ref(local->fd->inode); + + ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on %s", entry->d_name); + ret = -ENOMEM; + goto err; + } + + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; + + ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); + if (ret < 0) { + goto err; + } + entry->inode = inode_ref(loc.inode); + ret = 0; +err: + loc_wipe(&loc); + return ret; +} + +int shard_delete_shards(void *opaque) { + int ret = 0; + off_t offset = 0; + loc_t loc = { + 0, + }; + inode_t *link_inode = NULL; + xlator_t *this = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + gf_dirent_t entries; + gf_dirent_t *entry = NULL; + call_frame_t *cleanup_frame = NULL; + gf_boolean_t done = _gf_false; + + this = THIS; + priv = this->private; + INIT_LIST_HEAD(&entries.list); + + cleanup_frame = opaque; + + local = mem_get0(this->local_pool); + if (!local) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create local to " + "delete shards"); + ret = -ENOMEM; + goto err; + } + cleanup_frame->local = local; + local->fop = GF_FOP_UNLINK; + + local->xattr_req = dict_new(); + if (!local->xattr_req) { + ret = -ENOMEM; + goto err; + } + local->deletion_rate = priv->deletion_rate; + + ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret == -ENOENT) { + gf_msg_debug(this->name, 0, ".shard absent. Nothing to" + " delete. Exiting"); + ret = 0; + goto err; + } else if (ret < 0) { + goto err; + } - shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname)); - - shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK); - linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf); - inode_lookup(linked_inode); - list_index = block_num - local->first_block; - local->inode_list[list_index] = linked_inode; - + ret = shard_resolve_internal_dir(this, local, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + if (ret == -ENOENT) { + gf_msg_debug(this->name, 0, ".remove_me absent. " + "Nothing to delete. Exiting"); + ret = 0; + goto err; + } else if (ret < 0) { + goto err; + } + + local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); + if (!local->fd) { + ret = -ENOMEM; + goto err; + } + + for (;;) { + offset = 0; LOCK(&priv->lock); { - fsync_inode = __shard_update_shards_inode_list( - linked_inode, this, base_inode, block_num, gfid); + if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { + priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; + } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + done = _gf_true; + } } UNLOCK(&priv->lock); - if (fsync_inode) - shard_initiate_evicted_inode_fsync(this, fsync_inode); -} - -int -shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - int call_count = 0; - int shard_block_num = (long)cookie; - uuid_t gfid = { - 0, - }; - shard_local_t *local = NULL; - - local = frame->local; - if (local->resolver_base_inode) - gf_uuid_copy(gfid, local->resolver_base_inode->gfid); - else - gf_uuid_copy(gfid, local->base_gfid); - - if (op_ret < 0) { - /* Ignore absence of shards in the backend in truncate fop. */ - switch (local->fop) { - case GF_FOP_TRUNCATE: - case GF_FOP_FTRUNCATE: - case GF_FOP_RENAME: - case GF_FOP_UNLINK: - if (op_errno == ENOENT) - goto done; - break; - case GF_FOP_WRITE: - case GF_FOP_READ: - case GF_FOP_ZEROFILL: - case GF_FOP_DISCARD: - case GF_FOP_FALLOCATE: - if ((!local->first_lookup_done) && (op_errno == ENOENT)) { - LOCK(&frame->lock); - { - local->create_count++; - } - UNLOCK(&frame->lock); - goto done; - } - break; - default: - break; - } - - /* else */ - gf_msg(this->name, GF_LOG_ERROR, op_errno, - SHARD_MSG_LOOKUP_SHARD_FAILED, - "Lookup on shard %d " - "failed. Base file gfid = %s", - shard_block_num, uuid_utoa(gfid)); - local->op_ret = op_ret; - local->op_errno = op_errno; - goto done; - } - - shard_link_block_inode(local, shard_block_num, inode, buf); - -done: - if (local->lookup_shards_barriered) { - syncbarrier_wake(&local->barrier); - return 0; - } else { - call_count = shard_call_count_return(frame); - if (call_count == 0) { - if (!local->first_lookup_done) - local->first_lookup_done = _gf_true; - local->pls_fop_handler(frame, this); - } - } - return 0; -} - -dict_t * -shard_create_gfid_dict(dict_t *dict) -{ - int ret = 0; - dict_t *new = NULL; - unsigned char *gfid = NULL; - - new = dict_copy_with_ref(dict, NULL); - if (!new) - return NULL; - - gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char); - if (!gfid) { - ret = -1; - goto out; - } - - gf_uuid_generate(gfid); - - ret = dict_set_gfuuid(new, "gfid-req", gfid, false); - -out: - if (ret) { - dict_unref(new); - new = NULL; - GF_FREE(gfid); - } - - return new; -} + if (done) + break; + while ((ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, + &entries, local->xattr_req, NULL))) { + if (ret > 0) + ret = 0; + list_for_each_entry(entry, &entries.list, list) { + offset = entry->d_off; -int -shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode, - shard_post_lookup_shards_fop_handler_t handler) -{ - int i = 0; - int ret = 0; - int count = 0; - int call_count = 0; - int32_t shard_idx_iter = 0; - int last_block = 0; - char path[PATH_MAX] = { - 0, - }; - char *bname = NULL; - uuid_t gfid = { - 0, - }; - loc_t loc = { - 0, - }; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - gf_boolean_t wind_failed = _gf_false; - dict_t *xattr_req = NULL; - - priv = this->private; - local = frame->local; - count = call_count = local->call_count; - shard_idx_iter = local->first_block; - last_block = local->last_block; - local->pls_fop_handler = handler; - if (local->lookup_shards_barriered) - local->barrier.waitfor = local->call_count; - - if (inode) - gf_uuid_copy(gfid, inode->gfid); - else - gf_uuid_copy(gfid, local->base_gfid); + if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) + continue; - while (shard_idx_iter <= last_block) { - if (local->inode_list[i]) { - i++; - shard_idx_iter++; + if (!entry->inode) { + ret = shard_lookup_marker_entry(this, local, entry); + if (ret < 0) continue; } + link_inode = inode_link(entry->inode, local->fd->inode, entry->d_name, + &entry->d_stat); - if (wind_failed) { - shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, NULL, - NULL); - goto next; - } - - shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path)); - - bname = strrchr(path, '/') + 1; - loc.inode = inode_new(this->itable); - loc.parent = inode_ref(priv->dot_shard_inode); - gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid); - ret = inode_path(loc.parent, bname, (char **)&(loc.path)); - if (ret < 0 || !(loc.inode)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - " on %s, base file gfid = %s", - bname, uuid_utoa(gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe(&loc); - wind_failed = _gf_true; - shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, NULL, - NULL); - goto next; - } - - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - - xattr_req = shard_create_gfid_dict(local->xattr_req); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - loc_wipe(&loc); - shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter, - this, -1, ENOMEM, NULL, NULL, NULL, - NULL); - goto next; + gf_msg_debug(this->name, 0, "Initiating deletion of " + "shards of gfid %s", + entry->d_name); + ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, + link_inode); + inode_unlink(link_inode, local->fd->inode, entry->d_name); + inode_unref(link_inode); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, + SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to clean up shards of gfid %s", entry->d_name); + continue; } + gf_msg(this->name, GF_LOG_INFO, 0, SHARD_MSG_SHARD_DELETION_COMPLETED, + "Deleted " + "shards of gfid=%s from backend", + entry->d_name); + } + gf_dirent_free(&entries); + if (ret) + break; + } + } + ret = 0; + loc_wipe(&loc); + return ret; - STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk, - (void *)(long)shard_idx_iter, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &loc, xattr_req); - loc_wipe(&loc); - dict_unref(xattr_req); - next: - shard_idx_iter++; - i++; - - if (!--call_count) - break; - } - if (local->lookup_shards_barriered) { - syncbarrier_wait(&local->barrier, count); - local->pls_fop_handler(frame, this); - } - return 0; +err: + LOCK(&priv->lock); + { priv->bg_del_state = SHARD_BG_DELETION_NONE; } + UNLOCK(&priv->lock); + loc_wipe(&loc); + return ret; +} + +int shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { + if (op_ret) + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Unlock failed. Please check brick logs for " + "more details"); + SHARD_STACK_DESTROY(frame); + return 0; +} + +int shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) { + loc_t *loc = NULL; + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_inodelk_t *lock = NULL; + + local = frame->local; + lk_frame = local->inodelk_frame; + lk_local = lk_frame->local; + local->inodelk_frame = NULL; + loc = &local->int_inodelk.loc; + lock = &lk_local->int_inodelk; + lock->flock.l_type = F_UNLCK; + + STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, + &lock->flock, NULL); + local->int_inodelk.acquired_lock = _gf_false; + return 0; +} + +int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata); +int shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) { + int ret = 0; + loc_t *dst_loc = NULL; + loc_t tmp_loc = { + 0, + }; + shard_local_t *local = frame->local; + + if (local->dst_block_size) { + tmp_loc.parent = inode_ref(local->loc2.parent); + ret = inode_path(tmp_loc.parent, local->loc2.name, (char **)&tmp_loc.path); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + " on pargfid=%s bname=%s", + uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + + tmp_loc.name = strrchr(tmp_loc.path, '/'); + if (tmp_loc.name) + tmp_loc.name++; + dst_loc = &tmp_loc; + } else { + dst_loc = &local->loc2; + } + + /* To-Do: Request open-fd count on dst base file */ + STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, + local->xattr_req); + loc_wipe(&tmp_loc); + return 0; +err: + loc_wipe(&tmp_loc); + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; +} + +int shard_unlink_base_file(call_frame_t *frame, xlator_t *this); + +int shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *dict, + dict_t *xdata) { + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Xattrop on marker file failed " + "while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; + } + + inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, + local->newloc.name); + + if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + else if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); + return 0; +} + +int shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) { + int op_errno = ENOMEM; + uint64_t bs = 0; + dict_t *xdata = NULL; + shard_local_t *local = NULL; + + local = frame->local; + xdata = dict_new(); + if (!xdata) + goto err; + + if (local->fop == GF_FOP_UNLINK) + bs = local->block_size; + else if (local->fop == GF_FOP_RENAME) + bs = local->dst_block_size; + SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, + local->prebuf.ia_size, 0, err); + STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, &local->newloc, + GF_XATTROP_GET_AND_SET, xdata, NULL); + dict_unref(xdata); + return 0; +err: + if (xdata) + dict_unref(xdata); + shard_common_failure_unwind(local->fop, frame, -1, op_errno); + return 0; } -int -shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - if (local->op_errno == ENOENT) { - /* If lookup on /.shard fails with ENOENT, it means that - * the file was 0-byte in size but truncated sometime in - * the past to a higher size which is reflected in the - * size xattr, and now being truncated to a lower size. - * In this case, the only thing that needs to be done is - * to update the size xattr of the file and unwind. - */ - local->first_block = local->last_block = 0; - local->num_blocks = 1; - local->call_count = 0; - local->op_ret = 0; - local->postbuf.ia_size = local->offset; - shard_update_file_size(frame, this, local->fd, &local->loc, - shard_post_update_size_truncate_handler); - return 0; - } else { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } - } - - if (!local->call_count) - shard_truncate_do(frame, this); - else - shard_common_lookup_shards(frame, this, local->loc.inode, - shard_post_lookup_shards_truncate_handler); - - return 0; +int shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata, + struct iatt *postparent) { + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Lookup on marker file failed " + "while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; + } + + linked_inode = + inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); + inode_unref(local->newloc.inode); + local->newloc.inode = linked_inode; + shard_set_size_attrs_on_marker_file(frame, this); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); + return 0; } -int -shard_truncate_begin(call_frame_t *frame, xlator_t *this) -{ - int ret = 0; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = frame->local; - - /* First participant block here is the lowest numbered block that would - * hold the last byte of the file post successful truncation. - * Last participant block is the block that contains the last byte in - * the current state of the file. - * If (first block == last_block): - * then that means that the file only needs truncation of the - * first (or last since both are same) block. - * Else - * if (new_size % block_size == 0) - * then that means there is no truncate to be done with - * only shards from first_block + 1 through the last - * block needing to be unlinked. - * else - * both truncate of the first block and unlink of the - * remaining shards until end of file is required. - */ - local->first_block = (local->offset == 0) - ? 0 - : get_lowest_block(local->offset - 1, - local->block_size); - local->last_block = get_highest_block(0, local->prebuf.ia_size, - local->block_size); - - local->num_blocks = local->last_block - local->first_block + 1; - local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE) - ? local->loc.inode - : local->fd->inode; - - if ((local->first_block == 0) && (local->num_blocks == 1)) { - if (local->fop == GF_FOP_TRUNCATE) - STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, &local->loc, - local->offset, local->xattr_req); - else - STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, local->fd, - local->offset, local->xattr_req); - return 0; - } +int shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) { + int op_errno = ENOMEM; + dict_t *xattr_req = NULL; + shard_local_t *local = NULL; - local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto err; + local = frame->local; - local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = shard_init_internal_dir_loc(this, local, - SHARD_INTERNAL_DIR_DOT_SHARD); - if (ret) - goto err; - shard_lookup_internal_dir(frame, this, - shard_post_resolve_truncate_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - local->post_res_handler = shard_post_resolve_truncate_handler; - shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); - } - return 0; + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) + goto err; + STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); + dict_unref(xattr_req); + return 0; err: - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(local->fop, frame, -1, op_errno); + return 0; } -int -shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - struct iatt tmp_stbuf = { - 0, - }; - - local = frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } - - local->postbuf = tmp_stbuf = local->prebuf; - - if (local->prebuf.ia_size == local->offset) { - /* If the file size is same as requested size, unwind the call - * immediately. - */ - if (local->fop == GF_FOP_TRUNCATE) - SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf, - &local->postbuf, NULL); - else - SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf, - &local->postbuf, NULL); - } else if (local->offset > local->prebuf.ia_size) { - /* If the truncate is from a lower to a higher size, set the - * new size xattr and unwind. - */ - local->hole_size = local->offset - local->prebuf.ia_size; - local->delta_size = 0; - GF_ATOMIC_INIT(local->delta_blocks, 0); - local->postbuf.ia_size = local->offset; - tmp_stbuf.ia_size = local->offset; - shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, - SHARD_INODE_WRITE_MASK); - shard_update_file_size(frame, this, NULL, &local->loc, - shard_post_update_size_truncate_handler); +int shard_create_marker_file_under_remove_me_cbk( + call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { + inode_t *linked_inode = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (op_ret < 0) { + if ((op_errno != EEXIST) && (op_errno != ENODATA)) { + local->op_ret = op_ret; + local->op_errno = op_errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Marker file creation " + "failed while performing %s; entry gfid=%s", + gf_fop_string(local->fop), local->newloc.name); + goto err; } else { - /* ... else - * i. unlink all shards that need to be unlinked. - * ii. truncate the last of the shards. - * iii. update the new size using setxattr. - * and unwind the fop. - */ - local->hole_size = 0; - local->delta_size = (local->offset - local->prebuf.ia_size); - GF_ATOMIC_INIT(local->delta_blocks, 0); - tmp_stbuf.ia_size = local->offset; - shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0, - SHARD_INODE_WRITE_MASK); - shard_truncate_begin(frame, this); - } - return 0; -} - -/* TO-DO: - * Fix updates to size and block count with racing write(s) and truncate(s). - */ - -int -shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(loc->inode->gfid)); - goto err; + shard_lookup_marker_file(frame, this); + return 0; } + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); - return 0; - } - - if (!this->itable) - this->itable = loc->inode->table; - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - - ret = syncbarrier_init(&local->barrier); - if (ret) - goto err; - loc_copy(&local->loc, loc); - local->offset = offset; - local->block_size = block_size; - local->fop = GF_FOP_TRUNCATE; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - local->resolver_base_inode = loc->inode; - GF_ATOMIC_INIT(local->delta_blocks, 0); - - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); - return 0; + linked_inode = + inode_link(inode, priv->dot_shard_rm_inode, local->newloc.name, buf); + inode_unref(local->newloc.inode); + local->newloc.inode = linked_inode; + if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + else if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + return 0; err: - shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM); - return 0; -} - -int -shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(fd->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - return 0; - } - - if (!this->itable) - this->itable = fd->inode->table; - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - ret = syncbarrier_init(&local->barrier); - if (ret) - goto err; - local->fd = fd_ref(fd); - local->offset = offset; - local->block_size = block_size; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_FTRUNCATE; + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; +} + +int shard_create_marker_file_under_remove_me(call_frame_t *frame, + xlator_t *this, loc_t *loc) { + int ret = 0; + int op_errno = ENOMEM; + uint64_t bs = 0; + char g1[64] = { + 0, + }; + char g2[64] = { + 0, + }; + dict_t *xattr_req = NULL; + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + SHARD_SET_ROOT_FS_ID(frame, local); + + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) + goto err; + + local->newloc.inode = inode_new(this->itable); + local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); + ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), + (char **)&local->newloc.path); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed on " + "pargfid=%s bname=%s", + uuid_utoa_r(priv->dot_shard_rm_gfid, g1), + uuid_utoa_r(loc->inode->gfid, g2)); + goto err; + } + local->newloc.name = strrchr(local->newloc.path, '/'); + if (local->newloc.name) + local->newloc.name++; + + if (local->fop == GF_FOP_UNLINK) + bs = local->block_size; + else if (local->fop == GF_FOP_RENAME) + bs = local->dst_block_size; + + SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, + local->prebuf.ia_size, 0, err); + + STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, &local->newloc, + 0, 0, 0644, xattr_req); + dict_unref(xattr_req); + return 0; - local->loc.inode = inode_ref(fd->inode); - gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - local->resolver_base_inode = fd->inode; - GF_ATOMIC_INIT(local->delta_blocks, 0); - - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_truncate_handler); - return 0; err: - shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM); - return 0; + if (xattr_req) + dict_unref(xattr_req); + shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, + NULL, NULL, NULL, NULL, NULL); + return 0; } -int -shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int ret = -1; - shard_local_t *local = NULL; - - local = frame->local; - - if (op_ret == -1) - goto unwind; - - ret = shard_inode_ctx_set(inode, this, buf, local->block_size, - SHARD_ALL_MASK); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, - "Failed to set inode " - "ctx for %s", - uuid_utoa(inode->gfid)); - -unwind: - SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent, - postparent, xdata); +int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); - return 0; -} +int shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { + int ret = 0; + shard_local_t *local = NULL; -int -shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, mode_t umask, dict_t *xdata) -{ - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; + local = frame->local; - priv = this->private; - local = mem_get0(this->local_pool); - if (!local) - goto err; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } else { + shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); + local->preoldparent = *preparent; + local->postoldparent = *postparent; + if (xdata) + local->xattr_rsp = dict_ref(xdata); + if (local->cleanup_required) + shard_start_background_deletion(this); + } - frame->local = local; - local->block_size = priv->block_size; - if (!__is_gsyncd_on_shard_dir(frame, loc)) { - SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); + if (local->entrylk_frame) { + ret = shard_unlock_entrylk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; } + } - STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); - return 0; -err: - shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM); - return 0; -} - -int32_t -shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - shard_local_t *local = NULL; - - local = frame->local; - if (op_ret < 0) - goto err; - - shard_inode_ctx_set(inode, this, buf, 0, - SHARD_MASK_NLINK | SHARD_MASK_TIMES); - buf->ia_size = local->prebuf.ia_size; - buf->ia_blocks = local->prebuf.ia_blocks; - - SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, - postparent, xdata); - return 0; + ret = shard_unlock_inodelk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } + + shard_unlink_cbk(frame, this); + return 0; +} + +int shard_unlink_base_file(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = frame->local; + + /* To-Do: Request open-fd count on base file */ + STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, + local->xattr_req); + return 0; +} + +int shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { + if (op_ret) + gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, + "Unlock failed. Please check brick logs for " + "more details"); + SHARD_STACK_DESTROY(frame); + return 0; +} + +int shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) { + loc_t *loc = NULL; + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_entrylk_t *lock = NULL; + + local = frame->local; + lk_frame = local->entrylk_frame; + lk_local = lk_frame->local; + local->entrylk_frame = NULL; + lock = &lk_local->int_entrylk; + loc = &lock->loc; + + STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, loc, + lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, + NULL); + local->int_entrylk.acquired_lock = _gf_false; + return 0; +} + +int shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; + + local = frame->local; + + switch (local->fop) { + case GF_FOP_UNLINK: + case GF_FOP_RENAME: + shard_create_marker_file_under_remove_me(frame, this, + &local->int_inodelk.loc); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "post-entrylk handler not defined. This case should not" + " be hit"); + break; + } + return 0; +} + +int shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { + call_frame_t *main_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *main_local = NULL; + + local = frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); + return 0; + } + main_local->int_entrylk.acquired_lock = _gf_true; + shard_post_entrylk_fop_handler(main_frame, this); + return 0; +} + +int shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, + uuid_t gfid) { + char gfid_str[GF_UUID_BUF_SIZE] = { + 0, + }; + shard_local_t *local = NULL; + shard_local_t *entrylk_local = NULL; + shard_entrylk_t *int_entrylk = NULL; + call_frame_t *entrylk_frame = NULL; + + local = frame->local; + entrylk_frame = create_frame(this, this->ctx->pool); + if (!entrylk_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to lock marker file"); + goto err; + } + + entrylk_local = mem_get0(this->local_pool); + if (!entrylk_local) { + STACK_DESTROY(entrylk_frame->root); + goto err; + } + + entrylk_frame->local = entrylk_local; + entrylk_local->main_frame = frame; + int_entrylk = &entrylk_local->int_entrylk; + + int_entrylk->loc.inode = inode_ref(inode); + set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); + local->entrylk_frame = entrylk_frame; + gf_uuid_unparse(gfid, gfid_str); + int_entrylk->basename = gf_strdup(gfid_str); + + STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, + int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + return 0; err: - shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno); - return 0; -} - -int -shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL, - NULL, NULL, NULL, NULL); - return 0; - } - - STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2, - local->xattr_req); - return 0; + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; } -int32_t -shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(oldloc->inode->gfid)); - goto err; - } - - if (!block_size) { - STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, - oldloc, newloc, xdata); - return 0; - } - - if (!this->itable) - this->itable = oldloc->inode->table; - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_link_handler); - return 0; -err: - shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM); - return 0; -} - -int -shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode); - -int -shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if ((local->op_ret < 0) && (local->op_errno != ENOENT)) { - gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED, - "failed to delete shards of %s", - uuid_utoa(local->resolver_base_inode->gfid)); - return 0; - } - local->op_ret = 0; - local->op_errno = 0; - - shard_unlink_shards_do(frame, this, local->resolver_base_inode); - return 0; -} - -int -shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - local->lookup_shards_barriered = _gf_true; - - if (!local->call_count) - shard_unlink_shards_do(frame, this, local->resolver_base_inode); - else - shard_common_lookup_shards(frame, this, local->resolver_base_inode, - shard_post_lookup_shards_unlink_handler); - return 0; -} - -void -shard_unlink_block_inode(shard_local_t *local, int shard_block_num) -{ - char block_bname[256] = { - 0, - }; - uuid_t gfid = { - 0, - }; - inode_t *inode = NULL; - inode_t *base_inode = NULL; - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *base_ictx = NULL; - int unref_base_inode = 0; - int unref_shard_inode = 0; - - this = THIS; - priv = this->private; - - inode = local->inode_list[shard_block_num - local->first_block]; - shard_inode_ctx_get(inode, this, &ctx); - base_inode = ctx->base_inode; - if (base_inode) - gf_uuid_copy(gfid, base_inode->gfid); - else - gf_uuid_copy(gfid, ctx->base_gfid); - shard_make_block_bname(shard_block_num, gfid, block_bname, - sizeof(block_bname)); - - LOCK(&priv->lock); - if (base_inode) - LOCK(&base_inode->lock); - LOCK(&inode->lock); - { - __shard_inode_ctx_get(inode, this, &ctx); - if (!list_empty(&ctx->ilist)) { - list_del_init(&ctx->ilist); - priv->inode_count--; - unref_base_inode++; - unref_shard_inode++; - GF_ASSERT(priv->inode_count >= 0); - } - if (ctx->fsync_needed) { - unref_base_inode++; - unref_shard_inode++; - list_del_init(&ctx->to_fsync_list); - if (base_inode) { - __shard_inode_ctx_get(base_inode, this, &base_ictx); - base_ictx->fsync_count--; - } - } - } - UNLOCK(&inode->lock); - if (base_inode) - UNLOCK(&base_inode->lock); - - inode_unlink(inode, priv->dot_shard_inode, block_bname); - inode_ref_reduce_by_n(inode, unref_shard_inode); - inode_forget(inode, 0); - - if (base_inode && unref_base_inode) - inode_ref_reduce_by_n(base_inode, unref_base_inode); - UNLOCK(&priv->lock); -} - -int -shard_rename_cbk(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->preoldparent, - &local->postoldparent, &local->prenewparent, - &local->postnewparent, local->xattr_rsp); - return 0; -} - -int32_t -shard_unlink_cbk(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = frame->local; - - SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, - &local->preoldparent, &local->postoldparent, - local->xattr_rsp); - return 0; -} - -int -shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - int shard_block_num = (long)cookie; - shard_local_t *local = NULL; - - local = frame->local; - - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto done; - } - - shard_unlink_block_inode(local, shard_block_num); -done: - syncbarrier_wake(&local->barrier); - return 0; -} - -int -shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode) -{ - int i = 0; - int ret = -1; - int count = 0; - uint32_t cur_block = 0; - uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */ - char *bname = NULL; - char path[PATH_MAX] = { - 0, - }; - uuid_t gfid = { - 0, - }; - loc_t loc = { - 0, - }; - gf_boolean_t wind_failed = _gf_false; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = frame->local; - - if (inode) - gf_uuid_copy(gfid, inode->gfid); - else - gf_uuid_copy(gfid, local->base_gfid); - - for (i = 0; i < local->num_blocks; i++) { - if (!local->inode_list[i]) - continue; - count++; - } - - if (!count) { - /* callcount = 0 implies that all of the shards that need to be - * unlinked are non-existent (in other words the file is full of - * holes). - */ - gf_msg_debug(this->name, 0, - "All shards that need to be " - "unlinked are non-existent: %s", - uuid_utoa(gfid)); - return 0; - } - - SHARD_SET_ROOT_FS_ID(frame, local); - local->barrier.waitfor = count; - cur_block = cur_block_idx + local->first_block; - - while (cur_block_idx < local->num_blocks) { - if (!local->inode_list[cur_block_idx]) - goto next; - - if (wind_failed) { - shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } - - shard_make_block_abspath(cur_block, gfid, path, sizeof(path)); - bname = strrchr(path, '/') + 1; - loc.parent = inode_ref(priv->dot_shard_inode); - ret = inode_path(loc.parent, bname, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - " on %s, base file gfid = %s", - bname, uuid_utoa(gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - loc_wipe(&loc); - wind_failed = _gf_true; - shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } - - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - loc.inode = inode_ref(local->inode_list[cur_block_idx]); - - STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk, - (void *)(long)cur_block, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &loc, local->xflag, - local->xattr_req); - loc_wipe(&loc); - next: - cur_block++; - cur_block_idx++; - } - syncbarrier_wait(&local->barrier, count); - SHARD_UNSET_ROOT_FS_ID(frame, local); - return 0; -} - -int -shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this, - int now, int first_block, gf_dirent_t *entry) -{ - int i = 0; - int ret = 0; - shard_local_t *local = NULL; - uuid_t gfid = { - 0, - }; - - local = cleanup_frame->local; - - local->inode_list = GF_CALLOC(now, sizeof(inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - return -ENOMEM; - - local->first_block = first_block; - local->last_block = first_block + now - 1; - local->num_blocks = now; - gf_uuid_parse(entry->d_name, gfid); - gf_uuid_copy(local->base_gfid, gfid); - local->resolver_base_inode = inode_find(this->itable, gfid); - local->call_count = 0; - ret = syncbarrier_init(&local->barrier); - if (ret) { - GF_FREE(local->inode_list); - local->inode_list = NULL; - inode_unref(local->resolver_base_inode); - local->resolver_base_inode = NULL; - return -errno; - } - shard_common_resolve_shards(cleanup_frame, this, - shard_post_resolve_unlink_handler); - - for (i = 0; i < local->num_blocks; i++) { - if (local->inode_list[i]) - inode_unref(local->inode_list[i]); - } - GF_FREE(local->inode_list); - local->inode_list = NULL; - if (local->op_ret) - ret = -local->op_errno; - syncbarrier_destroy(&local->barrier); - inode_unref(local->resolver_base_inode); - local->resolver_base_inode = NULL; - STACK_RESET(cleanup_frame->root); - return ret; -} - -int -__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, - gf_dirent_t *entry, inode_t *inode) -{ - int ret = 0; - int shard_count = 0; - int first_block = 0; - int now = 0; - uint64_t size = 0; - uint64_t block_size = 0; - uint64_t size_array[4] = { - 0, - }; - void *bsize = NULL; - void *size_attr = NULL; - dict_t *xattr_rsp = NULL; - loc_t loc = { - 0, - }; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = cleanup_frame->local; - ret = dict_reset(local->xattr_req); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to reset dict"); - ret = -ENOMEM; - goto err; - } - - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); - ret = -ENOMEM; - goto err; - } - - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); - ret = -ENOMEM; - goto err; - } - - loc.inode = inode_ref(inode); - loc.parent = inode_ref(priv->dot_shard_rm_inode); - ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", entry->d_name); - ret = -ENOMEM; - goto err; - } - - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req, - &xattr_rsp); - if (ret) - goto err; - - ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE); - goto err; - } - block_size = ntoh64(*((uint64_t *)bsize)); - - ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE); - goto err; - } - - memcpy(size_array, size_attr, sizeof(size_array)); - size = ntoh64(size_array[0]); - - shard_count = (size / block_size) - 1; - if (shard_count < 0) { - gf_msg_debug(this->name, 0, - "Size of %s hasn't grown beyond " - "its shard-block-size. Nothing to delete. " - "Returning", - entry->d_name); - /* File size < shard-block-size, so nothing to delete */ - ret = 0; - goto delete_marker; - } - if ((size % block_size) > 0) - shard_count++; - - if (shard_count == 0) { - gf_msg_debug(this->name, 0, - "Size of %s is exactly equal to " - "its shard-block-size. Nothing to delete. " - "Returning", - entry->d_name); - ret = 0; - goto delete_marker; - } - gf_msg_debug(this->name, 0, - "base file = %s, " - "shard-block-size=%" PRIu64 ", file-size=%" PRIu64 - ", " - "shard_count=%d", - entry->d_name, block_size, size, shard_count); - - /* Perform a gfid-based lookup to see if gfid corresponding to marker - * file's base name exists. - */ - loc_wipe(&loc); - loc.inode = inode_new(this->itable); - if (!loc.inode) { - ret = -ENOMEM; - goto err; - } - gf_uuid_parse(entry->d_name, loc.gfid); - ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); - if (!ret) { - gf_msg_debug(this->name, 0, - "Base shard corresponding to gfid " - "%s is present. Skipping shard deletion. " - "Returning", - entry->d_name); - ret = 0; - goto delete_marker; - } - - first_block = 1; - - while (shard_count) { - if (shard_count < local->deletion_rate) { - now = shard_count; - shard_count = 0; - } else { - now = local->deletion_rate; - shard_count -= local->deletion_rate; - } - - gf_msg_debug(this->name, 0, - "deleting %d shards starting from " - "block %d of gfid %s", - now, first_block, entry->d_name); - ret = shard_regulated_shards_deletion(cleanup_frame, this, now, - first_block, entry); - if (ret) - goto err; - first_block += now; - } - -delete_marker: - loc_wipe(&loc); - loc.inode = inode_ref(inode); - loc.parent = inode_ref(priv->dot_shard_rm_inode); - ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", entry->d_name); - ret = -ENOMEM; - goto err; - } - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL); - if (ret) - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED, - "Failed to delete %s " - "from /%s", - entry->d_name, GF_SHARD_REMOVE_ME_DIR); -err: - if (xattr_rsp) - dict_unref(xattr_rsp); - loc_wipe(&loc); - return ret; -} - -int -shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this, - gf_dirent_t *entry, inode_t *inode) -{ - int ret = -1; - loc_t loc = { - 0, - }; - shard_priv_t *priv = NULL; - - priv = this->private; - loc.inode = inode_ref(priv->dot_shard_rm_inode); - - ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, - ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL); - if (ret < 0) { - if (ret == -EAGAIN) { - ret = 0; - } - goto out; - } - { - ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode); - } - syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name, - ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL); -out: - loc_wipe(&loc); - return ret; -} - -int -shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data) -{ - SHARD_STACK_DESTROY(frame); - return 0; -} - -int -shard_resolve_internal_dir(xlator_t *this, shard_local_t *local, - shard_internal_dir_type_t type) -{ - int ret = 0; - char *bname = NULL; - loc_t *loc = NULL; - shard_priv_t *priv = NULL; - uuid_t gfid = { - 0, - }; - struct iatt stbuf = { - 0, - }; - - priv = this->private; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - loc = &local->dot_shard_loc; - gf_uuid_copy(gfid, priv->dot_shard_gfid); - bname = GF_SHARD_DIR; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - loc = &local->dot_shard_rm_loc; - gf_uuid_copy(gfid, priv->dot_shard_rm_gfid); - bname = GF_SHARD_REMOVE_ME_DIR; - break; - default: - break; - } - - loc->inode = inode_find(this->itable, gfid); - if (!loc->inode) { - ret = shard_init_internal_dir_loc(this, local, type); - if (ret) - goto err; - ret = dict_reset(local->xattr_req); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to reset " - "dict"); - ret = -ENOMEM; - goto err; - } - ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true); - ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, - local->xattr_req, NULL); - if (ret < 0) { - if (ret != -ENOENT) - gf_msg(this->name, GF_LOG_ERROR, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Lookup on %s failed, exiting", bname); - goto err; - } else { - shard_link_internal_dir_inode(local, loc->inode, &stbuf, type); - } - } - ret = 0; -err: - return ret; -} - -int -shard_lookup_marker_entry(xlator_t *this, shard_local_t *local, - gf_dirent_t *entry) -{ - int ret = 0; - loc_t loc = { - 0, - }; - - loc.inode = inode_new(this->itable); - if (!loc.inode) { - ret = -ENOMEM; - goto err; - } - loc.parent = inode_ref(local->fd->inode); - - ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path)); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on %s", entry->d_name); - ret = -ENOMEM; - goto err; - } - - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - - ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL); - if (ret < 0) { - goto err; - } - entry->inode = inode_ref(loc.inode); - ret = 0; -err: - loc_wipe(&loc); - return ret; -} - -int -shard_delete_shards(void *opaque) -{ - int ret = 0; - off_t offset = 0; - loc_t loc = { - 0, - }; - inode_t *link_inode = NULL; - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - gf_dirent_t entries; - gf_dirent_t *entry = NULL; - call_frame_t *cleanup_frame = NULL; - gf_boolean_t done = _gf_false; - - this = THIS; - priv = this->private; - INIT_LIST_HEAD(&entries.list); - - cleanup_frame = opaque; - - local = mem_get0(this->local_pool); - if (!local) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create local to " - "delete shards"); - ret = -ENOMEM; - goto err; - } - cleanup_frame->local = local; - local->fop = GF_FOP_UNLINK; - - local->xattr_req = dict_new(); - if (!local->xattr_req) { - ret = -ENOMEM; - goto err; - } - local->deletion_rate = priv->deletion_rate; - - ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); - if (ret == -ENOENT) { - gf_msg_debug(this->name, 0, - ".shard absent. Nothing to" - " delete. Exiting"); - ret = 0; - goto err; - } else if (ret < 0) { - goto err; - } - - ret = shard_resolve_internal_dir(this, local, - SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); - if (ret == -ENOENT) { - gf_msg_debug(this->name, 0, - ".remove_me absent. " - "Nothing to delete. Exiting"); - ret = 0; - goto err; - } else if (ret < 0) { - goto err; - } - - local->fd = fd_anonymous(local->dot_shard_rm_loc.inode); - if (!local->fd) { - ret = -ENOMEM; - goto err; - } - - for (;;) { - offset = 0; - LOCK(&priv->lock); - { - if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { - priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; - } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { - priv->bg_del_state = SHARD_BG_DELETION_NONE; - done = _gf_true; - } - } - UNLOCK(&priv->lock); - if (done) - break; - while ( - (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset, - &entries, local->xattr_req, NULL))) { - if (ret > 0) - ret = 0; - list_for_each_entry(entry, &entries.list, list) - { - offset = entry->d_off; - - if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) - continue; - - if (!entry->inode) { - ret = shard_lookup_marker_entry(this, local, entry); - if (ret < 0) - continue; - } - link_inode = inode_link(entry->inode, local->fd->inode, - entry->d_name, &entry->d_stat); - - gf_msg_debug(this->name, 0, - "Initiating deletion of " - "shards of gfid %s", - entry->d_name); - ret = shard_delete_shards_of_entry(cleanup_frame, this, entry, - link_inode); - inode_unlink(link_inode, local->fd->inode, entry->d_name); - inode_unref(link_inode); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Failed to clean up shards of gfid %s", - entry->d_name); - continue; - } - gf_msg(this->name, GF_LOG_INFO, 0, - SHARD_MSG_SHARD_DELETION_COMPLETED, - "Deleted " - "shards of gfid=%s from backend", - entry->d_name); - } - gf_dirent_free(&entries); - if (ret) - break; - } - } - ret = 0; - loc_wipe(&loc); - return ret; - -err: - LOCK(&priv->lock); - { - priv->bg_del_state = SHARD_BG_DELETION_NONE; - } - UNLOCK(&priv->lock); - loc_wipe(&loc); - return ret; -} - -int -shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - if (op_ret) - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Unlock failed. Please check brick logs for " - "more details"); - SHARD_STACK_DESTROY(frame); - return 0; -} - -int -shard_unlock_inodelk(call_frame_t *frame, xlator_t *this) -{ - loc_t *loc = NULL; - call_frame_t *lk_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *lk_local = NULL; - shard_inodelk_t *lock = NULL; - - local = frame->local; - lk_frame = local->inodelk_frame; - lk_local = lk_frame->local; - local->inodelk_frame = NULL; - loc = &local->int_inodelk.loc; - lock = &lk_local->int_inodelk; - lock->flock.l_type = F_UNLCK; - - STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK, - &lock->flock, NULL); - local->int_inodelk.acquired_lock = _gf_false; - return 0; -} - -int -shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata); -int -shard_rename_src_base_file(call_frame_t *frame, xlator_t *this) -{ - int ret = 0; - loc_t *dst_loc = NULL; - loc_t tmp_loc = { - 0, - }; - shard_local_t *local = frame->local; - - if (local->dst_block_size) { - tmp_loc.parent = inode_ref(local->loc2.parent); - ret = inode_path(tmp_loc.parent, local->loc2.name, - (char **)&tmp_loc.path); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - " on pargfid=%s bname=%s", - uuid_utoa(tmp_loc.parent->gfid), local->loc2.name); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } - - tmp_loc.name = strrchr(tmp_loc.path, '/'); - if (tmp_loc.name) - tmp_loc.name++; - dst_loc = &tmp_loc; - } else { - dst_loc = &local->loc2; - } - - /* To-Do: Request open-fd count on dst base file */ - STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc, - local->xattr_req); - loc_wipe(&tmp_loc); - return 0; -err: - loc_wipe(&tmp_loc); - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; -} - -int -shard_unlink_base_file(call_frame_t *frame, xlator_t *this); - -int -shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = frame->local; - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Xattrop on marker file failed " - "while performing %s; entry gfid=%s", - gf_fop_string(local->fop), local->newloc.name); - goto err; - } - - inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode, - local->newloc.name); - - if (local->fop == GF_FOP_UNLINK) - shard_unlink_base_file(frame, this); - else if (local->fop == GF_FOP_RENAME) - shard_rename_src_base_file(frame, this); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); - return 0; -} - -int -shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this) -{ - int op_errno = ENOMEM; - uint64_t bs = 0; - dict_t *xdata = NULL; - shard_local_t *local = NULL; - - local = frame->local; - xdata = dict_new(); - if (!xdata) - goto err; - - if (local->fop == GF_FOP_UNLINK) - bs = local->block_size; - else if (local->fop == GF_FOP_RENAME) - bs = local->dst_block_size; - SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc, - local->prebuf.ia_size, 0, err); - STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop, - &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL); - dict_unref(xdata); - return 0; -err: - if (xdata) - dict_unref(xdata); - shard_common_failure_unwind(local->fop, frame, -1, op_errno); - return 0; -} - -int -shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, - struct iatt *postparent) -{ - inode_t *linked_inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - local = frame->local; - priv = this->private; - - if (op_ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Lookup on marker file failed " - "while performing %s; entry gfid=%s", - gf_fop_string(local->fop), local->newloc.name); - goto err; - } - - linked_inode = inode_link(inode, priv->dot_shard_rm_inode, - local->newloc.name, buf); - inode_unref(local->newloc.inode); - local->newloc.inode = linked_inode; - shard_set_size_attrs_on_marker_file(frame, this); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, op_ret, op_errno); - return 0; -} - -int -shard_lookup_marker_file(call_frame_t *frame, xlator_t *this) -{ - int op_errno = ENOMEM; - dict_t *xattr_req = NULL; - shard_local_t *local = NULL; - - local = frame->local; - - xattr_req = shard_create_gfid_dict(local->xattr_req); - if (!xattr_req) - goto err; - - STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req); - dict_unref(xattr_req); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, -1, op_errno); - return 0; -} - -int -shard_create_marker_file_under_remove_me_cbk( - call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - inode_t *linked_inode = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - local = frame->local; - priv = this->private; - - SHARD_UNSET_ROOT_FS_ID(frame, local); - if (op_ret < 0) { - if ((op_errno != EEXIST) && (op_errno != ENODATA)) { - local->op_ret = op_ret; - local->op_errno = op_errno; - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Marker file creation " - "failed while performing %s; entry gfid=%s", - gf_fop_string(local->fop), local->newloc.name); - goto err; - } else { - shard_lookup_marker_file(frame, this); - return 0; - } - } - - linked_inode = inode_link(inode, priv->dot_shard_rm_inode, - local->newloc.name, buf); - inode_unref(local->newloc.inode); - local->newloc.inode = linked_inode; - - if (local->fop == GF_FOP_UNLINK) - shard_unlink_base_file(frame, this); - else if (local->fop == GF_FOP_RENAME) - shard_rename_src_base_file(frame, this); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); - return 0; -} - -int -shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this, - loc_t *loc) -{ - int ret = 0; - int op_errno = ENOMEM; - uint64_t bs = 0; - char g1[64] = { - 0, - }; - char g2[64] = { - 0, - }; - dict_t *xattr_req = NULL; - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = frame->local; - - SHARD_SET_ROOT_FS_ID(frame, local); - - xattr_req = shard_create_gfid_dict(local->xattr_req); - if (!xattr_req) - goto err; - - local->newloc.inode = inode_new(this->itable); - local->newloc.parent = inode_ref(priv->dot_shard_rm_inode); - ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid), - (char **)&local->newloc.path); - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed on " - "pargfid=%s bname=%s", - uuid_utoa_r(priv->dot_shard_rm_gfid, g1), - uuid_utoa_r(loc->inode->gfid, g2)); - goto err; - } - local->newloc.name = strrchr(local->newloc.path, '/'); - if (local->newloc.name) - local->newloc.name++; - - if (local->fop == GF_FOP_UNLINK) - bs = local->block_size; - else if (local->fop == GF_FOP_RENAME) - bs = local->dst_block_size; - - SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc, - local->prebuf.ia_size, 0, err); - - STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, - &local->newloc, 0, 0, 0644, xattr_req); - dict_unref(xattr_req); - return 0; - -err: - if (xattr_req) - dict_unref(xattr_req); - shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno, - NULL, NULL, NULL, NULL, NULL); - return 0; -} - -int -shard_unlock_entrylk(call_frame_t *frame, xlator_t *this); - -int -shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preparent, struct iatt *postparent, - dict_t *xdata) -{ - int ret = 0; - shard_local_t *local = NULL; - - local = frame->local; - - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } else { - local->preoldparent = *preparent; - local->postoldparent = *postparent; - if (xdata) - local->xattr_rsp = dict_ref(xdata); - if (local->cleanup_required) - shard_start_background_deletion(this); - } - - if (local->entrylk_frame) { - ret = shard_unlock_entrylk(frame, this); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - } - } - - ret = shard_unlock_inodelk(frame, this); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - } - - shard_unlink_cbk(frame, this); - return 0; -} - -int -shard_unlink_base_file(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = frame->local; - - /* To-Do: Request open-fd count on base file */ - STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag, - local->xattr_req); - return 0; -} - -int -shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - if (op_ret) - gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED, - "Unlock failed. Please check brick logs for " - "more details"); - SHARD_STACK_DESTROY(frame); - return 0; -} - -int -shard_unlock_entrylk(call_frame_t *frame, xlator_t *this) -{ - loc_t *loc = NULL; - call_frame_t *lk_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *lk_local = NULL; - shard_entrylk_t *lock = NULL; - - local = frame->local; - lk_frame = local->entrylk_frame; - lk_local = lk_frame->local; - local->entrylk_frame = NULL; - lock = &lk_local->int_entrylk; - loc = &lock->loc; - - STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, this->name, loc, - lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, - NULL); - local->int_entrylk.acquired_lock = _gf_false; - return 0; -} - -int -shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - switch (local->fop) { - case GF_FOP_UNLINK: - case GF_FOP_RENAME: - shard_create_marker_file_under_remove_me(frame, this, - &local->int_inodelk.loc); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "post-entrylk handler not defined. This case should not" - " be hit"); - break; - } - return 0; -} - -int -shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - call_frame_t *main_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *main_local = NULL; - - local = frame->local; - main_frame = local->main_frame; - main_local = main_frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(main_local->fop, main_frame, op_ret, - op_errno); - return 0; - } - main_local->int_entrylk.acquired_lock = _gf_true; - shard_post_entrylk_fop_handler(main_frame, this); - return 0; -} - -int -shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode, - uuid_t gfid) -{ - char gfid_str[GF_UUID_BUF_SIZE] = { - 0, - }; - shard_local_t *local = NULL; - shard_local_t *entrylk_local = NULL; - shard_entrylk_t *int_entrylk = NULL; - call_frame_t *entrylk_frame = NULL; - - local = frame->local; - entrylk_frame = create_frame(this, this->ctx->pool); - if (!entrylk_frame) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create new frame " - "to lock marker file"); - goto err; - } - - entrylk_local = mem_get0(this->local_pool); - if (!entrylk_local) { - STACK_DESTROY(entrylk_frame->root); - goto err; - } - - entrylk_frame->local = entrylk_local; - entrylk_local->main_frame = frame; - int_entrylk = &entrylk_local->int_entrylk; - - int_entrylk->loc.inode = inode_ref(inode); - set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root); - local->entrylk_frame = entrylk_frame; - gf_uuid_unparse(gfid, gfid_str); - int_entrylk->basename = gf_strdup(gfid_str); - - STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc, - int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; -} - -int -shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - - priv = this->private; - local = frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); - return 0; - } - - if (local->prebuf.ia_nlink > 1) { - gf_msg_debug(this->name, 0, - "link count on %s > 1:%d, " - "performing rename()/unlink()", - local->int_inodelk.loc.path, local->prebuf.ia_nlink); - if (local->fop == GF_FOP_RENAME) - shard_rename_src_base_file(frame, this); - else if (local->fop == GF_FOP_UNLINK) - shard_unlink_base_file(frame, this); - } else { - gf_msg_debug(this->name, 0, - "link count on %s = 1, creating " - "file under .remove_me", - local->int_inodelk.loc.path); - local->cleanup_required = _gf_true; - shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, - local->prebuf.ia_gfid); - } - return 0; -} - -int -shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - switch (local->fop) { - case GF_FOP_UNLINK: - case GF_FOP_RENAME: - shard_lookup_base_file(frame, this, &local->int_inodelk.loc, - shard_post_lookup_base_shard_rm_handler); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "post-inodelk handler not defined. This case should not" - " be hit"); - break; - } - return 0; -} - -int -shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - call_frame_t *main_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *main_local = NULL; - - local = frame->local; - main_frame = local->main_frame; - main_local = main_frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(main_local->fop, main_frame, op_ret, - op_errno); - return 0; - } - main_local->int_inodelk.acquired_lock = _gf_true; - shard_post_inodelk_fop_handler(main_frame, this); - return 0; -} - -int -shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - call_frame_t *lk_frame = NULL; - shard_local_t *local = NULL; - shard_local_t *lk_local = NULL; - shard_inodelk_t *int_inodelk = NULL; - - local = frame->local; - lk_frame = create_frame(this, this->ctx->pool); - if (!lk_frame) { - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, - "Failed to create new frame " - "to lock base shard"); - goto err; - } - lk_local = mem_get0(this->local_pool); - if (!lk_local) { - STACK_DESTROY(lk_frame->root); - goto err; - } - - lk_frame->local = lk_local; - lk_local->main_frame = frame; - int_inodelk = &lk_local->int_inodelk; - - int_inodelk->flock.l_len = 0; - int_inodelk->flock.l_start = 0; - int_inodelk->domain = this->name; - int_inodelk->flock.l_type = F_WRLCK; - loc_copy(&local->int_inodelk.loc, loc); - set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); - local->inodelk_frame = lk_frame; - - STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, - &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); - return 0; -err: - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; -} - -int -shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) -{ - loc_t *loc = NULL; - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); - return 0; - } - if (local->fop == GF_FOP_UNLINK) - loc = &local->loc; - else if (local->fop == GF_FOP_RENAME) - loc = &local->loc2; - shard_acquire_inodelk(frame, this, loc); - return 0; -} - -int -shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t handler, - shard_internal_dir_type_t type); -int -shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); - return 0; - } - shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, - SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); - return 0; -} - -void -shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) -{ - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = frame->local; - - local->dot_shard_rm_loc.inode = inode_find(this->itable, - priv->dot_shard_rm_gfid); - if (!local->dot_shard_rm_loc.inode) { - local->dot_shard_loc.inode = inode_find(this->itable, - priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - local->post_res_handler = shard_pre_mkdir_rm_handler; - shard_refresh_internal_dir(frame, this, - SHARD_INTERNAL_DIR_DOT_SHARD); - } - } else { - local->post_res_handler = shard_post_mkdir_rm_handler; - shard_refresh_internal_dir(frame, this, - SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); - } -} - -int -shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); - if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(loc->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; - } - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - - loc_copy(&local->loc, loc); - local->xflag = xflag; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - local->block_size = block_size; - local->resolver_base_inode = loc->inode; - local->fop = GF_FOP_UNLINK; - if (!this->itable) - this->itable = (local->loc.inode)->table; - - local->resolve_not = _gf_true; - shard_begin_rm_resolution(frame, this); - return 0; -err: - shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); - return 0; -} - -int -shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) -{ - shard_rename_cbk(frame, this); - return 0; -} - -int -shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent, - dict_t *xdata) -{ - int ret = 0; - shard_local_t *local = NULL; - - local = frame->local; - - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto err; - } - /* Set ctx->refresh to TRUE to force a lookup on disk when - * shard_lookup_base_file() is called next to refresh the hard link - * count in ctx. Note that this is applicable only to the case where - * the rename dst is already existent and sharded. - */ - if ((local->dst_block_size) && (!local->cleanup_required)) - shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); - - local->prebuf = *buf; - local->preoldparent = *preoldparent; - local->postoldparent = *postoldparent; - local->prenewparent = *prenewparent; - local->postnewparent = *postnewparent; - if (xdata) - local->xattr_rsp = dict_ref(xdata); - - if (local->dst_block_size) { - if (local->entrylk_frame) { - ret = shard_unlock_entrylk(frame, this); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - } - } - - ret = shard_unlock_inodelk(frame, this); - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - goto err; - } - if (local->cleanup_required) - shard_start_background_deletion(this); - } - - /* Now the base file of src, if sharded, is looked up to gather ia_size - * and ia_blocks.*/ - if (local->block_size) { - local->tmp_loc.inode = inode_new(this->itable); - gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); - shard_lookup_base_file(frame, this, &local->tmp_loc, - shard_post_rename_lookup_handler); - } else { - shard_rename_cbk(frame, this); - } - return 0; -err: - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; -} - -int -shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } - - /* Save dst base file attributes into postbuf so the information is not - * lost when it is overwritten after lookup on base file of src in - * shard_lookup_base_file_cbk(). - */ - local->postbuf = local->prebuf; - shard_rename_src_base_file(frame, this); - return 0; -} - -int -shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, - dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - uint64_t dst_block_size = 0; - shard_local_t *local = NULL; - - if (IA_ISDIR(oldloc->inode->ia_type)) { - STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); - return 0; - } - - ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); - if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size from inode ctx of %s", - uuid_utoa(oldloc->inode->gfid)); - goto err; - } - - if (newloc->inode) - ret = shard_inode_ctx_get_block_size(newloc->inode, this, - &dst_block_size); - - /* The following stack_wind covers the case where: - * a. the src file is not sharded and dst doesn't exist, OR - * b. the src and dst both exist but are not sharded. - */ - if (((!block_size) && (!dst_block_size)) || - frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); - return 0; - } - - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - loc_copy(&local->loc, oldloc); - loc_copy(&local->loc2, newloc); - local->resolver_base_inode = newloc->inode; - local->fop = GF_FOP_RENAME; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - - local->block_size = block_size; - local->dst_block_size = dst_block_size; - if (!this->itable) - this->itable = (local->loc.inode)->table; - local->resolve_not = _gf_true; - - /* The following if-block covers the case where the dst file exists - * and is sharded. - */ - if (local->dst_block_size) { - shard_begin_rm_resolution(frame, this); - } else { - /* The following block covers the case where the dst either doesn't - * exist or is NOT sharded but the src is sharded. In this case, shard - * xlator would go ahead and rename src to dst. Once done, it would also - * lookup the base shard of src to get the ia_size and ia_blocks xattr - * values. - */ - shard_rename_src_base_file(frame, this); - } - return 0; - -err: - shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); - return 0; -} - -int -shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int ret = -1; - shard_local_t *local = NULL; - - local = frame->local; - - if (op_ret == -1) - goto unwind; - - ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, - SHARD_ALL_MASK); - if (ret) - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, - "Failed to set inode " - "ctx for %s", - uuid_utoa(inode->gfid)); - -unwind: - SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, - preparent, postparent, xdata); - return 0; -} - -int -shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) -{ - shard_priv_t *priv = NULL; - shard_local_t *local = NULL; - - priv = this->private; - local = mem_get0(this->local_pool); - if (!local) - goto err; - - frame->local = local; - local->block_size = priv->block_size; - - if (!__is_gsyncd_on_shard_dir(frame, loc)) { - SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); - } - - STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, - xdata); - return 0; -err: - shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); - return 0; -} - -int -shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) -{ - /* To-Do: Handle open with O_TRUNC under locks */ - SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); - return 0; -} - -int -shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) -{ - STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); - return 0; -} - -int -shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref, - dict_t *xdata) -{ - int i = 0; - int call_count = 0; - void *address = NULL; - uint64_t block_num = 0; - off_t off = 0; - struct iovec vec = { - 0, - }; - shard_local_t *local = NULL; - fd_t *anon_fd = cookie; - shard_inode_ctx_t *ctx = NULL; - - local = frame->local; - - /* If shard has already seen a failure here before, there is no point - * in aggregating subsequent reads, so just go to out. - */ - if (local->op_ret < 0) - goto out; - - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto out; - } - - if (local->op_ret >= 0) - local->op_ret += op_ret; - - shard_inode_ctx_get(anon_fd->inode, this, &ctx); - block_num = ctx->block_num; - - if (block_num == local->first_block) { - address = local->iobuf->ptr; - } else { - /* else - * address to start writing to = beginning of buffer + - * number of bytes until end of first block + - * + block_size times number of blocks - * between the current block and the first - */ - address = (char *)local->iobuf->ptr + - (local->block_size - (local->offset % local->block_size)) + - ((block_num - local->first_block - 1) * local->block_size); - } - - for (i = 0; i < count; i++) { - address = (char *)address + off; - memcpy(address, vector[i].iov_base, vector[i].iov_len); - off += vector[i].iov_len; - } - -out: - if (anon_fd) - fd_unref(anon_fd); - call_count = shard_call_count_return(frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID(frame, local); - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - } else { - if (xdata) - local->xattr_rsp = dict_ref(xdata); - vec.iov_base = local->iobuf->ptr; - vec.iov_len = local->total_size; - local->op_ret = local->total_size; - SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, - &vec, 1, &local->prebuf, local->iobref, - local->xattr_rsp); - return 0; - } - } - - return 0; -} - -int -shard_readv_do(call_frame_t *frame, xlator_t *this) -{ - int i = 0; - int call_count = 0; - int last_block = 0; - int cur_block = 0; - off_t orig_offset = 0; - off_t shard_offset = 0; - size_t read_size = 0; - size_t remaining_size = 0; - fd_t *fd = NULL; - fd_t *anon_fd = NULL; - shard_local_t *local = NULL; - gf_boolean_t wind_failed = _gf_false; - - local = frame->local; - fd = local->fd; - - orig_offset = local->offset; - cur_block = local->first_block; - last_block = local->last_block; - remaining_size = local->total_size; - local->call_count = call_count = local->num_blocks; - - SHARD_SET_ROOT_FS_ID(frame, local); - - if (fd->flags & O_DIRECT) - local->flags = O_DIRECT; - - while (cur_block <= last_block) { - if (wind_failed) { - shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, - 0, NULL, NULL, NULL); - goto next; - } - - shard_offset = orig_offset % local->block_size; - read_size = local->block_size - shard_offset; - if (read_size > remaining_size) - read_size = remaining_size; - - remaining_size -= read_size; - - if (cur_block == 0) { - anon_fd = fd_ref(fd); - } else { - anon_fd = fd_anonymous(local->inode_list[i]); - if (!anon_fd) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, - ENOMEM, NULL, 0, NULL, NULL, NULL); - goto next; - } - } +int shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; - STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, anon_fd, read_size, - shard_offset, local->flags, local->xattr_req); + priv = this->private; + local = frame->local; - orig_offset += read_size; - next: - cur_block++; - i++; - call_count--; - } + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); return 0; + } + + if (local->prebuf.ia_nlink > 1) { + gf_msg_debug(this->name, 0, "link count on %s > 1:%d, " + "performing rename()/unlink()", + local->int_inodelk.loc.path, local->prebuf.ia_nlink); + if (local->fop == GF_FOP_RENAME) + shard_rename_src_base_file(frame, this); + else if (local->fop == GF_FOP_UNLINK) + shard_unlink_base_file(frame, this); + } else { + gf_msg_debug(this->name, 0, "link count on %s = 1, creating " + "file under .remove_me", + local->int_inodelk.loc.path); + local->cleanup_required = _gf_true; + shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode, + local->prebuf.ia_gfid); + } + return 0; +} + +int shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; + + local = frame->local; + + switch (local->fop) { + case GF_FOP_UNLINK: + case GF_FOP_RENAME: + shard_lookup_base_file(frame, this, &local->int_inodelk.loc, + shard_post_lookup_base_shard_rm_handler); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "post-inodelk handler not defined. This case should not" + " be hit"); + break; + } + return 0; +} + +int shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { + call_frame_t *main_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *main_local = NULL; + + local = frame->local; + main_frame = local->main_frame; + main_local = main_frame->local; + + if (local->op_ret < 0) { + shard_common_failure_unwind(main_local->fop, main_frame, op_ret, op_errno); + return 0; + } + main_local->int_inodelk.acquired_lock = _gf_true; + shard_post_inodelk_fop_handler(main_frame, this); + return 0; +} + +int shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc) { + call_frame_t *lk_frame = NULL; + shard_local_t *local = NULL; + shard_local_t *lk_local = NULL; + shard_inodelk_t *int_inodelk = NULL; + + local = frame->local; + lk_frame = create_frame(this, this->ctx->pool); + if (!lk_frame) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create new frame " + "to lock base shard"); + goto err; + } + lk_local = mem_get0(this->local_pool); + if (!lk_local) { + STACK_DESTROY(lk_frame->root); + goto err; + } + + lk_frame->local = lk_local; + lk_local->main_frame = frame; + int_inodelk = &lk_local->int_inodelk; + + int_inodelk->flock.l_len = 0; + int_inodelk->flock.l_start = 0; + int_inodelk->domain = this->name; + int_inodelk->flock.l_type = F_WRLCK; + loc_copy(&local->int_inodelk.loc, loc); + set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root); + local->inodelk_frame = lk_frame; + + STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain, + &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL); + return 0; +err: + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; } -int -shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - int shard_block_num = (long)cookie; - int call_count = 0; - shard_local_t *local = NULL; +int shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { + loc_t *loc = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) { - if (op_errno == EEXIST) { - LOCK(&frame->lock); - { - local->eexist_count++; - } - UNLOCK(&frame->lock); - } else { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - gf_msg_debug(this->name, 0, - "mknod of shard %d " - "failed: %s", - shard_block_num, strerror(op_errno)); - goto done; - } + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); + return 0; + } + if (local->fop == GF_FOP_UNLINK) + loc = &local->loc; + else if (local->fop == GF_FOP_RENAME) + loc = &local->loc2; + shard_acquire_inodelk(frame, this, loc); + return 0; +} - shard_link_block_inode(local, shard_block_num, inode, buf); +int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler, + shard_internal_dir_type_t type); +int shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; -done: - call_count = shard_call_count_return(frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID(frame, local); - local->create_count = 0; - local->post_mknod_handler(frame, this); - } + local = frame->local; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, -1, local->op_errno); return 0; + } + shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + return 0; } -int -shard_common_resume_mknod(call_frame_t *frame, xlator_t *this, - shard_post_mknod_fop_handler_t post_mknod_handler) -{ - int i = 0; - int shard_idx_iter = 0; - int last_block = 0; - int ret = 0; - int call_count = 0; - char path[PATH_MAX] = { - 0, - }; - mode_t mode = 0; - char *bname = NULL; - shard_priv_t *priv = NULL; - shard_inode_ctx_t ctx_tmp = { - 0, - }; - shard_local_t *local = NULL; - gf_boolean_t wind_failed = _gf_false; - fd_t *fd = NULL; - loc_t loc = { - 0, - }; - dict_t *xattr_req = NULL; - - local = frame->local; - priv = this->private; - fd = local->fd; - shard_idx_iter = local->first_block; - last_block = local->last_block; - call_count = local->call_count = local->create_count; - local->post_mknod_handler = post_mknod_handler; +void shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this) { + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; - SHARD_SET_ROOT_FS_ID(frame, local); + priv = this->private; + local = frame->local; - ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get inode " - "ctx for %s", - uuid_utoa(fd->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - goto err; - } - mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); + local->dot_shard_rm_loc.inode = + inode_find(this->itable, priv->dot_shard_rm_gfid); + if (!local->dot_shard_rm_loc.inode) { + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_pre_mkdir_rm_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); + } + } else { + local->post_res_handler = shard_post_mkdir_rm_handler; + shard_refresh_internal_dir(frame, this, + SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME); + } +} + +int shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, + dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + loc_copy(&local->loc, loc); + local->xflag = xflag; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + local->block_size = block_size; + local->resolver_base_inode = loc->inode; + local->fop = GF_FOP_UNLINK; + if (!this->itable) + this->itable = (local->loc.inode)->table; + + local->resolve_not = _gf_true; + shard_begin_rm_resolution(frame, this); + return 0; +err: + shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM); + return 0; +} - while (shard_idx_iter <= last_block) { - if (local->inode_list[i]) { - shard_idx_iter++; - i++; - continue; - } +int shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this) { + shard_rename_cbk(frame, this); + return 0; +} - if (wind_failed) { - shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, - -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - goto next; - } +int shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { + int ret = 0; + shard_local_t *local = NULL; - shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, - sizeof(path)); - - xattr_req = shard_create_gfid_dict(local->xattr_req); - if (!xattr_req) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, - -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - goto next; - } + local = frame->local; - bname = strrchr(path, '/') + 1; - loc.inode = inode_new(this->itable); - loc.parent = inode_ref(priv->dot_shard_inode); - ret = inode_path(loc.parent, bname, (char **)&(loc.path)); - if (ret < 0 || !(loc.inode)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, - "Inode path failed" - "on %s, base file gfid = %s", - bname, uuid_utoa(fd->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - loc_wipe(&loc); - dict_unref(xattr_req); - shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, - -1, ENOMEM, NULL, NULL, NULL, NULL, NULL); - goto next; - } + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto err; + } + /* Set ctx->refresh to TRUE to force a lookup on disk when + * shard_lookup_base_file() is called next to refresh the hard link + * count in ctx. Note that this is applicable only to the case where + * the rename dst is already existent and sharded. + */ + if ((local->dst_block_size) && (!local->cleanup_required)) + shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this); + + local->prebuf = *buf; + local->preoldparent = *preoldparent; + local->postoldparent = *postoldparent; + local->prenewparent = *prenewparent; + local->postnewparent = *postnewparent; + if (xdata) + local->xattr_rsp = dict_ref(xdata); - loc.name = strrchr(loc.path, '/'); - if (loc.name) - loc.name++; - - STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, - (void *)(long)shard_idx_iter, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, &loc, mode, - ctx_tmp.stat.ia_rdev, 0, xattr_req); - loc_wipe(&loc); - dict_unref(xattr_req); - - next: - shard_idx_iter++; - i++; - if (!--call_count) - break; + if (local->dst_block_size) { + if (local->entrylk_frame) { + ret = shard_unlock_entrylk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } } - return 0; + ret = shard_unlock_inodelk(frame, this); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + goto err; + } + if (local->cleanup_required) + shard_start_background_deletion(this); + } + + /* Now the base file of src, if sharded, is looked up to gather ia_size + * and ia_blocks.*/ + if (local->block_size) { + local->tmp_loc.inode = inode_new(this->itable); + gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid); + shard_lookup_base_file(frame, this, &local->tmp_loc, + shard_post_rename_lookup_handler); + } else { + shard_rename_cbk(frame, this); + } + return 0; err: - /* - * This block is for handling failure in shard_inode_ctx_get_all(). - * Failures in the while-loop are handled within the loop. - */ - SHARD_UNSET_ROOT_FS_ID(frame, local); - post_mknod_handler(frame, this); - return 0; + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; } -int -shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); - -int -shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - return 0; - } +int shard_post_lookup_dst_base_file_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - if (local->create_count) { - shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); - } else { - shard_readv_do(frame, this); - } + local = frame->local; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; + } + + /* Save dst base file attributes into postbuf so the information is not + * lost when it is overwritten after lookup on base file of src in + * shard_lookup_base_file_cbk(). + */ + local->postbuf = local->prebuf; + shard_rename_src_base_file(frame, this); + return 0; +} + +int shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + uint64_t dst_block_size = 0; + shard_local_t *local = NULL; + + if (IA_ISDIR(oldloc->inode->ia_type)) { + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + + ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size); + if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size from inode ctx of %s", + uuid_utoa(oldloc->inode->gfid)); + goto err; + } + + if (newloc->inode) + ret = shard_inode_ctx_get_block_size(newloc->inode, this, &dst_block_size); + + /* The following stack_wind covers the case where: + * a. the src file is not sharded and dst doesn't exist, OR + * b. the src and dst both exist but are not sharded. + */ + if (((!block_size) && (!dst_block_size)) || + frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + loc_copy(&local->loc, oldloc); + loc_copy(&local->loc2, newloc); + local->resolver_base_inode = newloc->inode; + local->fop = GF_FOP_RENAME; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + local->block_size = block_size; + local->dst_block_size = dst_block_size; + if (!this->itable) + this->itable = (local->loc.inode)->table; + local->resolve_not = _gf_true; + + /* The following if-block covers the case where the dst file exists + * and is sharded. + */ + if (local->dst_block_size) { + shard_begin_rm_resolution(frame, this); + } else { + /* The following block covers the case where the dst either doesn't + * exist or is NOT sharded but the src is sharded. In this case, shard + * xlator would go ahead and rename src to dst. Once done, it would also + * lookup the base shard of src to get the ia_size and ia_blocks xattr + * values. + */ + shard_rename_src_base_file(frame, this); + } + return 0; + +err: + shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM); + return 0; } -int -shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; +int shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { + int ret = -1; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - return 0; - } + if (op_ret == -1) + goto unwind; - if (!local->eexist_count) { - shard_readv_do(frame, this); - } else { - local->call_count = local->eexist_count; - shard_common_lookup_shards(frame, this, local->loc.inode, - shard_post_lookup_shards_readv_handler); - } - return 0; + ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size, + SHARD_ALL_MASK); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED, + "Failed to set inode " + "ctx for %s", + uuid_utoa(inode->gfid)); + +unwind: + SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf, + preparent, postparent, xdata); + return 0; } -int -shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; +int shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { + shard_priv_t *priv = NULL; + shard_local_t *local = NULL; - local = frame->local; + priv = this->private; + local = mem_get0(this->local_pool); + if (!local) + goto err; - if (local->op_ret < 0) { - if (local->op_errno != ENOENT) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - return 0; - } else { - struct iovec vec = { - 0, - }; - - vec.iov_base = local->iobuf->ptr; - vec.iov_len = local->total_size; - local->op_ret = local->total_size; - SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, - &local->prebuf, local->iobref, NULL); - return 0; - } - } + frame->local = local; + local->block_size = priv->block_size; - if (local->call_count) { - shard_common_lookup_shards(frame, this, local->resolver_base_inode, - shard_post_lookup_shards_readv_handler); - } else { - shard_readv_do(frame, this); - } + if (!__is_gsyncd_on_shard_dir(frame, loc)) { + SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err); + } - return 0; -} + STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +err: + shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM); + return 0; +} + +int shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { + /* To-Do: Handle open with O_TRUNC under locks */ + SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { + STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { + int i = 0; + int call_count = 0; + void *address = NULL; + uint64_t block_num = 0; + off_t off = 0; + struct iovec vec = { + 0, + }; + shard_local_t *local = NULL; + fd_t *anon_fd = cookie; + shard_inode_ctx_t *ctx = NULL; + + local = frame->local; + + /* If shard has already seen a failure here before, there is no point + * in aggregating subsequent reads, so just go to out. + */ + if (local->op_ret < 0) + goto out; + + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto out; + } + + if (local->op_ret >= 0) + local->op_ret += op_ret; -int -shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) -{ - int ret = 0; - struct iobuf *iobuf = NULL; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; + shard_inode_ctx_get(anon_fd->inode, this, &ctx); + block_num = ctx->block_num; + + if (block_num == local->first_block) { + address = local->iobuf->ptr; + } else { + /* else + * address to start writing to = beginning of buffer + + * number of bytes until end of first block + + * + block_size times number of blocks + * between the current block and the first + */ + address = (char *)local->iobuf->ptr + + (local->block_size - (local->offset % local->block_size)) + + ((block_num - local->first_block - 1) * local->block_size); + } - priv = this->private; - local = frame->local; + for (i = 0; i < count; i++) { + address = (char *)address + off; + memcpy(address, vector[i].iov_base, vector[i].iov_len); + off += vector[i].iov_len; + } +out: + if (anon_fd) + fd_unref(anon_fd); + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, - local->op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + } else { + if (xdata) + local->xattr_rsp = dict_ref(xdata); + vec.iov_base = local->iobuf->ptr; + vec.iov_len = local->total_size; + local->op_ret = local->total_size; + SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, &vec, 1, + &local->prebuf, local->iobref, local->xattr_rsp); + return 0; + } + } + + return 0; +} + +int shard_readv_do(call_frame_t *frame, xlator_t *this) { + int i = 0; + int call_count = 0; + int last_block = 0; + int cur_block = 0; + off_t orig_offset = 0; + off_t shard_offset = 0; + size_t read_size = 0; + size_t remaining_size = 0; + fd_t *fd = NULL; + fd_t *anon_fd = NULL; + shard_local_t *local = NULL; + gf_boolean_t wind_failed = _gf_false; + + local = frame->local; + fd = local->fd; + + orig_offset = local->offset; + cur_block = local->first_block; + last_block = local->last_block; + remaining_size = local->total_size; + local->call_count = call_count = local->num_blocks; + + SHARD_SET_ROOT_FS_ID(frame, local); + + if (fd->flags & O_DIRECT) + local->flags = O_DIRECT; + + while (cur_block <= last_block) { + if (wind_failed) { + shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL, 0, + NULL, NULL, NULL); + goto next; + } + + shard_offset = orig_offset % local->block_size; + read_size = local->block_size - shard_offset; + if (read_size > remaining_size) + read_size = remaining_size; + + remaining_size -= read_size; + + if (cur_block == 0) { + anon_fd = fd_ref(fd); + } else { + anon_fd = fd_anonymous(local->inode_list[i]); + if (!anon_fd) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, NULL, + 0, NULL, NULL, NULL); + goto next; + } } - if (local->offset >= local->prebuf.ia_size) { - /* If the read is being performed past the end of the file, - * unwind the FOP with 0 bytes read as status. - */ - struct iovec vec = { - 0, - }; - - iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); - if (!iobuf) - goto err; - - vec.iov_base = iobuf->ptr; - vec.iov_len = 0; - local->iobref = iobref_new(); - iobref_add(local->iobref, iobuf); - iobuf_unref(iobuf); - - SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, - local->iobref, NULL); - return 0; - } + STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, anon_fd, read_size, + shard_offset, local->flags, local->xattr_req); + + orig_offset += read_size; + next: + cur_block++; + i++; + call_count--; + } + return 0; +} - local->first_block = get_lowest_block(local->offset, local->block_size); +int shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { + int shard_block_num = (long)cookie; + int call_count = 0; + shard_local_t *local = NULL; + + local = frame->local; + + if (op_ret < 0) { + if (op_errno == EEXIST) { + LOCK(&frame->lock); + { local->eexist_count++; } + UNLOCK(&frame->lock); + } else { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + gf_msg_debug(this->name, 0, "mknod of shard %d " + "failed: %s", + shard_block_num, strerror(op_errno)); + goto done; + } - local->total_size = local->req_size; + shard_link_block_inode(local, shard_block_num, inode, buf); - local->last_block = get_highest_block(local->offset, local->total_size, - local->block_size); +done: + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + local->create_count = 0; + local->post_mknod_handler(frame, this); + } + + return 0; +} + +int shard_common_resume_mknod( + call_frame_t *frame, xlator_t *this, + shard_post_mknod_fop_handler_t post_mknod_handler) { + int i = 0; + int shard_idx_iter = 0; + int last_block = 0; + int ret = 0; + int call_count = 0; + char path[PATH_MAX] = { + 0, + }; + mode_t mode = 0; + char *bname = NULL; + shard_priv_t *priv = NULL; + shard_inode_ctx_t ctx_tmp = { + 0, + }; + shard_local_t *local = NULL; + gf_boolean_t wind_failed = _gf_false; + fd_t *fd = NULL; + loc_t loc = { + 0, + }; + dict_t *xattr_req = NULL; + + local = frame->local; + priv = this->private; + fd = local->fd; + shard_idx_iter = local->first_block; + last_block = local->last_block; + call_count = local->call_count = local->create_count; + local->post_mknod_handler = post_mknod_handler; + + SHARD_SET_ROOT_FS_ID(frame, local); + + ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get inode " + "ctx for %s", + uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + goto err; + } + mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type); - local->num_blocks = local->last_block - local->first_block + 1; - local->resolver_base_inode = local->loc.inode; + while (shard_idx_iter <= last_block) { + if (local->inode_list[i]) { + shard_idx_iter++; + i++; + continue; + } - local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) - goto err; + if (wind_failed) { + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, + ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; + } - iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); - if (!iobuf) - goto err; + shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path, + sizeof(path)); - local->iobref = iobref_new(); - if (!local->iobref) { - iobuf_unref(iobuf); - goto err; + xattr_req = shard_create_gfid_dict(local->xattr_req); + if (!xattr_req) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, + ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; } - if (iobref_add(local->iobref, iobuf) != 0) { - iobuf_unref(iobuf); - goto err; + bname = strrchr(path, '/') + 1; + loc.inode = inode_new(this->itable); + loc.parent = inode_ref(priv->dot_shard_inode); + ret = inode_path(loc.parent, bname, (char **)&(loc.path)); + if (ret < 0 || !(loc.inode)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED, + "Inode path failed" + "on %s, base file gfid = %s", + bname, uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + loc_wipe(&loc); + dict_unref(xattr_req); + shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this, -1, + ENOMEM, NULL, NULL, NULL, NULL, NULL); + goto next; } - memset(iobuf->ptr, 0, local->total_size); - iobuf_unref(iobuf); - local->iobuf = iobuf; + loc.name = strrchr(loc.path, '/'); + if (loc.name) + loc.name++; - local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); - if (!local->dot_shard_loc.inode) { - ret = shard_init_internal_dir_loc(this, local, - SHARD_INTERNAL_DIR_DOT_SHARD); - if (ret) - goto err; - shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - local->post_res_handler = shard_post_resolve_readv_handler; - shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); - } - return 0; + STACK_WIND_COOKIE(frame, shard_common_mknod_cbk, + (void *)(long)shard_idx_iter, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, &loc, mode, + ctx_tmp.stat.ia_rdev, 0, xattr_req); + loc_wipe(&loc); + dict_unref(xattr_req); + + next: + shard_idx_iter++; + i++; + if (!--call_count) + break; + } + + return 0; err: - shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); - return 0; + /* + * This block is for handling failure in shard_inode_ctx_get_all(). + * Failures in the while-loop are handled within the loop. + */ + SHARD_UNSET_ROOT_FS_ID(frame, local); + post_mknod_handler(frame, this); + return 0; } -int -shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) -{ - int ret = 0; - uint64_t block_size = 0; - shard_local_t *local = NULL; +int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this); - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size for %s from its inode ctx", - uuid_utoa(fd->inode->gfid)); - goto err; - } +int shard_post_lookup_shards_readv_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - /* block_size = 0 means that the file was created before - * sharding was enabled on the volume. - */ - STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, - xdata); - return 0; - } + local = frame->local; - if (!this->itable) - this->itable = fd->inode->table; + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } - local = mem_get0(this->local_pool); - if (!local) - goto err; + if (local->create_count) { + shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler); + } else { + shard_readv_do(frame, this); + } - frame->local = local; + return 0; +} - ret = syncbarrier_init(&local->barrier); - if (ret) - goto err; - local->fd = fd_ref(fd); - local->block_size = block_size; - local->offset = offset; - local->req_size = size; - local->flags = flags; - local->fop = GF_FOP_READ; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; +int shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; - local->loc.inode = inode_ref(fd->inode); - gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + local = frame->local; - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_readv_handler); - return 0; -err: - shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); return 0; + } + + if (!local->eexist_count) { + shard_readv_do(frame, this); + } else { + local->call_count = local->eexist_count; + shard_common_lookup_shards(frame, this, local->loc.inode, + shard_post_lookup_shards_readv_handler); + } + return 0; } -int -shard_common_inode_write_post_update_size_handler(call_frame_t *frame, - xlator_t *this) -{ - shard_local_t *local = NULL; +int shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); + if (local->op_ret < 0) { + if (local->op_errno != ENOENT) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; } else { - shard_common_inode_write_success_unwind(local->fop, frame, - local->written_size); + struct iovec vec = { + 0, + }; + + vec.iov_base = local->iobuf->ptr; + vec.iov_len = local->total_size; + local->op_ret = local->total_size; + SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1, + &local->prebuf, local->iobref, NULL); + return 0; } - return 0; -} + } -static gf_boolean_t -shard_is_appending_write(shard_local_t *local) -{ - if (local->fop != GF_FOP_WRITE) - return _gf_false; - if (local->flags & O_APPEND) - return _gf_true; - if (local->fd->flags & O_APPEND) - return _gf_true; - return _gf_false; + if (local->call_count) { + shard_common_lookup_shards(frame, this, local->resolver_base_inode, + shard_post_lookup_shards_readv_handler); + } else { + shard_readv_do(frame, this); + } + + return 0; } -int -__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, - xlator_t *this) -{ - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; +int shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this) { + int ret = 0; + struct iobuf *iobuf = NULL; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + + priv = this->private; + local = frame->local; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret, + local->op_errno); + return 0; + } - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + if (local->offset >= local->prebuf.ia_size) { + /* If the read is being performed past the end of the file, + * unwind the FOP with 0 bytes read as status. + */ + struct iovec vec = { + 0, + }; - if (shard_is_appending_write(local)) { - local->delta_size = local->total_size; - } else if (local->offset + local->total_size > ctx->stat.ia_size) { - local->delta_size = (local->offset + local->total_size) - - ctx->stat.ia_size; - } else { - local->delta_size = 0; - } - ctx->stat.ia_size += (local->delta_size); - local->postbuf = ctx->stat; + iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size); + if (!iobuf) + goto err; + + vec.iov_base = iobuf->ptr; + vec.iov_len = 0; + local->iobref = iobref_new(); + iobref_add(local->iobref, iobuf); + iobuf_unref(iobuf); + SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf, + local->iobref, NULL); return 0; -} + } -int -shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, - xlator_t *this) -{ - int ret = -1; + local->first_block = get_lowest_block(local->offset, local->block_size); - LOCK(&inode->lock); - { - ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); - } - UNLOCK(&inode->lock); + local->total_size = local->req_size; - return ret; -} + local->last_block = + get_highest_block(local->offset, local->total_size, local->block_size); -int -shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *pre, - struct iatt *post, dict_t *xdata) -{ - int call_count = 0; - fd_t *anon_fd = cookie; - shard_local_t *local = NULL; - glusterfs_fop_t fop = 0; + local->num_blocks = local->last_block - local->first_block + 1; + local->resolver_base_inode = local->loc.inode; - local = frame->local; - fop = local->fop; + local->inode_list = + GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); + if (!local->inode_list) + goto err; - LOCK(&frame->lock); - { - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } else { - local->written_size += op_ret; - GF_ATOMIC_ADD(local->delta_blocks, - post->ia_blocks - pre->ia_blocks); - local->delta_size += (post->ia_size - pre->ia_size); - shard_inode_ctx_set(local->fd->inode, this, post, 0, - SHARD_MASK_TIMES); - if (local->fd->inode != anon_fd->inode) - shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, - anon_fd->inode); - } - } - UNLOCK(&frame->lock); + iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size); + if (!iobuf) + goto err; - if (anon_fd) - fd_unref(anon_fd); + local->iobref = iobref_new(); + if (!local->iobref) { + iobuf_unref(iobuf); + goto err; + } - call_count = shard_call_count_return(frame); - if (call_count == 0) { - SHARD_UNSET_ROOT_FS_ID(frame, local); - if (local->op_ret < 0) { - shard_common_failure_unwind(fop, frame, local->op_ret, - local->op_errno); - } else { - shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); - local->hole_size = 0; - if (xdata) - local->xattr_rsp = dict_ref(xdata); - shard_update_file_size( - frame, this, local->fd, NULL, - shard_common_inode_write_post_update_size_handler); - } - } + if (iobref_add(local->iobref, iobuf) != 0) { + iobuf_unref(iobuf); + goto err; + } - return 0; + memset(iobuf->ptr, 0, local->total_size); + iobuf_unref(iobuf); + local->iobuf = iobuf; + + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + if (!local->dot_shard_loc.inode) { + ret = + shard_init_internal_dir_loc(this, local, SHARD_INTERNAL_DIR_DOT_SHARD); + if (ret) + goto err; + shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + local->post_res_handler = shard_post_resolve_readv_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; +err: + shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); + return 0; +} + +int shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { + int ret = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto err; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + /* block_size = 0 means that the file was created before + * sharding was enabled on the volume. + */ + STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto err; + + frame->local = local; + + ret = syncbarrier_init(&local->barrier); + if (ret) + goto err; + local->fd = fd_ref(fd); + local->block_size = block_size; + local->offset = offset; + local->req_size = size; + local->flags = flags; + local->fop = GF_FOP_READ; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_readv_handler); + return 0; +err: + shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM); + return 0; } -int -shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vec, int count, off_t shard_offset, - size_t size) -{ - shard_local_t *local = NULL; +int shard_common_inode_write_post_update_size_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - switch (local->fop) { - case GF_FOP_WRITE: - STACK_WIND_COOKIE( - frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset, - local->flags, local->iobref, local->xattr_req); - break; - case GF_FOP_FALLOCATE: - STACK_WIND_COOKIE( - frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fallocate, fd, local->flags, - shard_offset, size, local->xattr_req); - break; - case GF_FOP_ZEROFILL: - STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->zerofill, fd, - shard_offset, size, local->xattr_req); - break; - case GF_FOP_DISCARD: - STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->discard, fd, - shard_offset, size, local->xattr_req); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "Invalid fop id = %d", local->fop); - break; - } - return 0; + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + } else { + shard_common_inode_write_success_unwind(local->fop, frame, + local->written_size); + } + return 0; } -int -shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) -{ - int i = 0; - int count = 0; - int call_count = 0; - int last_block = 0; - uint32_t cur_block = 0; - fd_t *fd = NULL; - fd_t *anon_fd = NULL; - shard_local_t *local = NULL; - struct iovec *vec = NULL; - gf_boolean_t wind_failed = _gf_false; - gf_boolean_t odirect = _gf_false; - off_t orig_offset = 0; - off_t shard_offset = 0; - off_t vec_offset = 0; - size_t remaining_size = 0; - size_t shard_write_size = 0; - - local = frame->local; - fd = local->fd; - - orig_offset = local->offset; - remaining_size = local->total_size; - cur_block = local->first_block; - local->call_count = call_count = local->num_blocks; - last_block = local->last_block; - - SHARD_SET_ROOT_FS_ID(frame, local); - - if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC - " into " - "dict: %s", - uuid_utoa(fd->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - local->call_count = 1; - shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, - ENOMEM, NULL, NULL, NULL); - return 0; - } +static gf_boolean_t shard_is_appending_write(shard_local_t *local) { + if (local->fop != GF_FOP_WRITE) + return _gf_false; + if (local->flags & O_APPEND) + return _gf_true; + if (local->fd->flags & O_APPEND) + return _gf_true; + return _gf_false; +} - if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) - odirect = _gf_true; +int __shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) { + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - while (cur_block <= last_block) { - if (wind_failed) { - shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, - ENOMEM, NULL, NULL, NULL); - goto next; - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - shard_offset = orig_offset % local->block_size; - shard_write_size = local->block_size - shard_offset; - if (shard_write_size > remaining_size) - shard_write_size = remaining_size; - - remaining_size -= shard_write_size; - - if (local->fop == GF_FOP_WRITE) { - count = iov_subset(local->vector, local->count, vec_offset, - vec_offset + shard_write_size, NULL); - - vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); - if (!vec) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - GF_FREE(vec); - shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, - -1, ENOMEM, NULL, NULL, NULL); - goto next; - } - count = iov_subset(local->vector, local->count, vec_offset, - vec_offset + shard_write_size, vec); - } + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - if (cur_block == 0) { - anon_fd = fd_ref(fd); - } else { - anon_fd = fd_anonymous(local->inode_list[i]); - if (!anon_fd) { - local->op_ret = -1; - local->op_errno = ENOMEM; - wind_failed = _gf_true; - GF_FREE(vec); - shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, - this, -1, ENOMEM, NULL, NULL, - NULL); - goto next; - } - - if (local->fop == GF_FOP_WRITE) { - if (odirect) - local->flags = O_DIRECT; - else - local->flags = GF_ANON_FD_FLAGS; - } - } + if (shard_is_appending_write(local)) { + local->delta_size = local->total_size; + } else if (local->offset + local->total_size > ctx->stat.ia_size) { + local->delta_size = (local->offset + local->total_size) - ctx->stat.ia_size; + } else { + local->delta_size = 0; + } + ctx->stat.ia_size += (local->delta_size); + local->postbuf = ctx->stat; - shard_common_inode_write_wind(frame, this, anon_fd, vec, count, - shard_offset, shard_write_size); - if (vec) - vec_offset += shard_write_size; - orig_offset += shard_write_size; - GF_FREE(vec); - vec = NULL; - next: - cur_block++; - i++; - call_count--; - } - return 0; + return 0; } -int -shard_common_inode_write_post_mknod_handler(call_frame_t *frame, - xlator_t *this); +int shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) { + int ret = -1; + + LOCK(&inode->lock); + { ret = __shard_get_delta_size_from_inode_ctx(local, inode, this); } + UNLOCK(&inode->lock); -int -shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, - xlator_t *this) -{ - shard_local_t *local = NULL; + return ret; +} - local = frame->local; +int shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) { + int call_count = 0; + fd_t *anon_fd = cookie; + shard_local_t *local = NULL; + glusterfs_fop_t fop = 0; - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } + local = frame->local; + fop = local->fop; - if (local->create_count) { - shard_common_resume_mknod(frame, this, - shard_common_inode_write_post_mknod_handler); + LOCK(&frame->lock); + { + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; } else { - shard_common_inode_write_do(frame, this); + local->written_size += op_ret; + GF_ATOMIC_ADD(local->delta_blocks, post->ia_blocks - pre->ia_blocks); + local->delta_size += (post->ia_size - pre->ia_size); + shard_inode_ctx_set(local->fd->inode, this, post, 0, SHARD_MASK_TIMES); + if (local->fd->inode != anon_fd->inode) + shard_inode_ctx_add_to_fsync_list(local->fd->inode, this, + anon_fd->inode); + } + } + UNLOCK(&frame->lock); + + if (anon_fd) + fd_unref(anon_fd); + + call_count = shard_call_count_return(frame); + if (call_count == 0) { + SHARD_UNSET_ROOT_FS_ID(frame, local); + if (local->op_ret < 0) { + shard_common_failure_unwind(fop, frame, local->op_ret, local->op_errno); + } else { + shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this); + local->hole_size = 0; + if (xdata) + local->xattr_rsp = dict_ref(xdata); + shard_update_file_size(frame, this, local->fd, NULL, + shard_common_inode_write_post_update_size_handler); } + } - return 0; + return 0; } -int -shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; +int shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vec, int count, + off_t shard_offset, size_t size) { + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } + switch (local->fop) { + case GF_FOP_WRITE: + STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, + vec, count, shard_offset, local->flags, local->iobref, + local->xattr_req); + break; + case GF_FOP_FALLOCATE: + STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fallocate, fd, + local->flags, shard_offset, size, local->xattr_req); + break; + case GF_FOP_ZEROFILL: + STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->zerofill, fd, + shard_offset, size, local->xattr_req); + break; + case GF_FOP_DISCARD: + STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->discard, fd, + shard_offset, size, local->xattr_req); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", local->fop); + break; + } + return 0; +} + +int shard_common_inode_write_do(call_frame_t *frame, xlator_t *this) { + int i = 0; + int count = 0; + int call_count = 0; + int last_block = 0; + uint32_t cur_block = 0; + fd_t *fd = NULL; + fd_t *anon_fd = NULL; + shard_local_t *local = NULL; + struct iovec *vec = NULL; + gf_boolean_t wind_failed = _gf_false; + gf_boolean_t odirect = _gf_false; + off_t orig_offset = 0; + off_t shard_offset = 0; + off_t vec_offset = 0; + size_t remaining_size = 0; + size_t shard_write_size = 0; + + local = frame->local; + fd = local->fd; + + orig_offset = local->offset; + remaining_size = local->total_size; + cur_block = local->first_block; + local->call_count = call_count = local->num_blocks; + last_block = local->last_block; + + SHARD_SET_ROOT_FS_ID(frame, local); + + if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC " into " + "dict: %s", + uuid_utoa(fd->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + local->call_count = 1; + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, + NULL, NULL, NULL); + return 0; + } - if (!local->eexist_count) { - shard_common_inode_write_do(frame, this); - } else { - local->call_count = local->eexist_count; - shard_common_lookup_shards( - frame, this, local->loc.inode, - shard_common_inode_write_post_lookup_shards_handler); + if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE)) + odirect = _gf_true; + + while (cur_block <= last_block) { + if (wind_failed) { + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, + NULL, NULL, NULL); + goto next; } - return 0; -} + shard_offset = orig_offset % local->block_size; + shard_write_size = local->block_size - shard_offset; + if (shard_write_size > remaining_size) + shard_write_size = remaining_size; -int -shard_common_inode_write_post_resolve_handler(call_frame_t *frame, - xlator_t *this) -{ - shard_local_t *local = NULL; + remaining_size -= shard_write_size; - local = frame->local; + if (local->fop == GF_FOP_WRITE) { + count = iov_subset(local->vector, local->count, vec_offset, + vec_offset + shard_write_size, NULL); - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; + vec = GF_CALLOC(count, sizeof(struct iovec), gf_shard_mt_iovec); + if (!vec) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + GF_FREE(vec); + shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } + count = iov_subset(local->vector, local->count, vec_offset, + vec_offset + shard_write_size, vec); } - if (local->call_count) { - shard_common_lookup_shards( - frame, this, local->resolver_base_inode, - shard_common_inode_write_post_lookup_shards_handler); + if (cur_block == 0) { + anon_fd = fd_ref(fd); } else { - shard_common_inode_write_do(frame, this); - } + anon_fd = fd_anonymous(local->inode_list[i]); + if (!anon_fd) { + local->op_ret = -1; + local->op_errno = ENOMEM; + wind_failed = _gf_true; + GF_FREE(vec); + shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd, this, -1, + ENOMEM, NULL, NULL, NULL); + goto next; + } - return 0; + if (local->fop == GF_FOP_WRITE) { + if (odirect) + local->flags = O_DIRECT; + else + local->flags = GF_ANON_FD_FLAGS; + } + } + + shard_common_inode_write_wind(frame, this, anon_fd, vec, count, + shard_offset, shard_write_size); + if (vec) + vec_offset += shard_write_size; + orig_offset += shard_write_size; + GF_FREE(vec); + vec = NULL; + next: + cur_block++; + i++; + call_count--; + } + return 0; } -int -shard_common_inode_write_post_lookup_handler(call_frame_t *frame, - xlator_t *this) -{ - shard_local_t *local = frame->local; - shard_priv_t *priv = this->private; - - if (local->op_ret < 0) { - shard_common_failure_unwind(local->fop, frame, local->op_ret, - local->op_errno); - return 0; - } - - local->postbuf = local->prebuf; - - /*Adjust offset to EOF so that correct shard is chosen for append*/ - if (shard_is_appending_write(local)) - local->offset = local->prebuf.ia_size; - - local->first_block = get_lowest_block(local->offset, local->block_size); - local->last_block = get_highest_block(local->offset, local->total_size, - local->block_size); - local->num_blocks = local->last_block - local->first_block + 1; - local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *), - gf_shard_mt_inode_list); - if (!local->inode_list) { - shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); - return 0; - } +int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, + xlator_t *this); - gf_msg_trace(this->name, 0, - "%s: gfid=%s first_block=%" PRIu32 - " " - "last_block=%" PRIu32 " num_blocks=%" PRIu32 " offset=%" PRId64 - " total_size=%zu flags=%" PRId32 "", - gf_fop_list[local->fop], - uuid_utoa(local->resolver_base_inode->gfid), - local->first_block, local->last_block, local->num_blocks, - local->offset, local->total_size, local->flags); +int shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); + local = frame->local; - if (!local->dot_shard_loc.inode) { - /*change handler*/ - shard_mkdir_internal_dir(frame, this, - shard_common_inode_write_post_resolve_handler, - SHARD_INTERNAL_DIR_DOT_SHARD); - } else { - /*change handler*/ - local->post_res_handler = shard_common_inode_write_post_resolve_handler; - shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); - } + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; -} - -int -shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - inode_t *link_inode = NULL; - shard_local_t *local = NULL; - shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; + } - local = frame->local; + if (local->create_count) { + shard_common_resume_mknod(frame, this, + shard_common_inode_write_post_mknod_handler); + } else { + shard_common_inode_write_do(frame, this); + } - SHARD_UNSET_ROOT_FS_ID(frame, local); + return 0; +} - if (op_ret == -1) { - if (op_errno != EEXIST) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } else { - gf_msg_debug(this->name, 0, - "mkdir on %s failed " - "with EEXIST. Attempting lookup now", - shard_internal_dir_string(type)); - shard_lookup_internal_dir(frame, this, local->post_res_handler, - type); - return 0; - } - } +int shard_common_inode_write_post_mknod_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - link_inode = shard_link_internal_dir_inode(local, inode, buf, type); - if (link_inode != inode) { - shard_refresh_internal_dir(frame, this, type); - } else { - shard_inode_ctx_mark_dir_refreshed(link_inode, this); - shard_common_resolve_shards(frame, this, local->post_res_handler); - } - return 0; -unwind: - shard_common_resolve_shards(frame, this, local->post_res_handler); - return 0; -} + local = frame->local; -int -shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, - shard_post_resolve_fop_handler_t handler, - shard_internal_dir_type_t type) -{ - int ret = -1; - shard_local_t *local = NULL; - shard_priv_t *priv = NULL; - dict_t *xattr_req = NULL; - uuid_t *gfid = NULL; - loc_t *loc = NULL; - gf_boolean_t free_gfid = _gf_true; - - local = frame->local; - priv = this->private; - - local->post_res_handler = handler; - gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); - if (!gfid) - goto err; - - switch (type) { - case SHARD_INTERNAL_DIR_DOT_SHARD: - gf_uuid_copy(*gfid, priv->dot_shard_gfid); - loc = &local->dot_shard_loc; - break; - case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: - gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); - loc = &local->dot_shard_rm_loc; - break; - default: - bzero(*gfid, sizeof(uuid_t)); - break; - } + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); + return 0; + } - xattr_req = dict_new(); - if (!xattr_req) - goto err; + if (!local->eexist_count) { + shard_common_inode_write_do(frame, this); + } else { + local->call_count = local->eexist_count; + shard_common_lookup_shards( + frame, this, local->loc.inode, + shard_common_inode_write_post_lookup_shards_handler); + } - ret = shard_init_internal_dir_loc(this, local, type); - if (ret) - goto err; + return 0; +} - ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, - "Failed to set gfid-req for %s", - shard_internal_dir_string(type)); - goto err; - } else { - free_gfid = _gf_false; - } +int shard_common_inode_write_post_resolve_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = NULL; - SHARD_SET_ROOT_FS_ID(frame, local); + local = frame->local; - STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, - 0755, 0, xattr_req); - dict_unref(xattr_req); + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; + } -err: - if (xattr_req) - dict_unref(xattr_req); - local->op_ret = -1; - local->op_errno = ENOMEM; - if (free_gfid) - GF_FREE(gfid); - handler(frame, this); - return 0; -} + if (local->call_count) { + shard_common_lookup_shards( + frame, this, local->resolver_base_inode, + shard_common_inode_write_post_lookup_shards_handler); + } else { + shard_common_inode_write_do(frame, this); + } -int -shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - /* To-Do: Wind flush on all shards of the file */ - SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); - return 0; + return 0; } -int -shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd, xdata); +int shard_common_inode_write_post_lookup_handler(call_frame_t *frame, + xlator_t *this) { + shard_local_t *local = frame->local; + shard_priv_t *priv = this->private; + + if (local->op_ret < 0) { + shard_common_failure_unwind(local->fop, frame, local->op_ret, + local->op_errno); return 0; -} + } -int -__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, - xlator_t *this) -{ - int ret = -1; - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; + local->postbuf = local->prebuf; - ret = __inode_ctx_get(inode, this, &ctx_uint); - if (ret < 0) - return ret; + /*Adjust offset to EOF so that correct shard is chosen for append*/ + if (shard_is_appending_write(local)) + local->offset = local->prebuf.ia_size; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + local->first_block = get_lowest_block(local->offset, local->block_size); + local->last_block = + get_highest_block(local->offset, local->total_size, local->block_size); + local->num_blocks = local->last_block - local->first_block + 1; + local->inode_list = + GF_CALLOC(local->num_blocks, sizeof(inode_t *), gf_shard_mt_inode_list); + if (!local->inode_list) { + shard_common_failure_unwind(local->fop, frame, -1, ENOMEM); + return 0; + } - local->postbuf.ia_ctime = ctx->stat.ia_ctime; - local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; - local->postbuf.ia_atime = ctx->stat.ia_atime; - local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; - local->postbuf.ia_mtime = ctx->stat.ia_mtime; - local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; + gf_msg_trace( + this->name, 0, "%s: gfid=%s first_block=%" PRIu32 " " + "last_block=%" PRIu32 " num_blocks=%" PRIu32 + " offset=%" PRId64 " total_size=%zu flags=%" PRId32 "", + gf_fop_list[local->fop], uuid_utoa(local->resolver_base_inode->gfid), + local->first_block, local->last_block, local->num_blocks, local->offset, + local->total_size, local->flags); - return 0; -} + local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid); -int -shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, - xlator_t *this) -{ - int ret = 0; + if (!local->dot_shard_loc.inode) { + /*change handler*/ + shard_mkdir_internal_dir(frame, this, + shard_common_inode_write_post_resolve_handler, + SHARD_INTERNAL_DIR_DOT_SHARD); + } else { + /*change handler*/ + local->post_res_handler = shard_common_inode_write_post_resolve_handler; + shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD); + } + return 0; +} - LOCK(&inode->lock); - { - ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); - } - UNLOCK(&inode->lock); +int shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { + inode_t *link_inode = NULL; + shard_local_t *local = NULL; + shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie; + + local = frame->local; + + SHARD_UNSET_ROOT_FS_ID(frame, local); + + if (op_ret == -1) { + if (op_errno != EEXIST) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } else { + gf_msg_debug(this->name, 0, "mkdir on %s failed " + "with EEXIST. Attempting lookup now", + shard_internal_dir_string(type)); + shard_lookup_internal_dir(frame, this, local->post_res_handler, type); + return 0; + } + } + + link_inode = shard_link_internal_dir_inode(local, inode, buf, type); + if (link_inode != inode) { + shard_refresh_internal_dir(frame, this, type); + } else { + shard_inode_ctx_mark_dir_refreshed(link_inode, this); + shard_common_resolve_shards(frame, this, local->post_res_handler); + } + return 0; +unwind: + shard_common_resolve_shards(frame, this, local->post_res_handler); + return 0; +} + +int shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this, + shard_post_resolve_fop_handler_t handler, + shard_internal_dir_type_t type) { + int ret = -1; + shard_local_t *local = NULL; + shard_priv_t *priv = NULL; + dict_t *xattr_req = NULL; + uuid_t *gfid = NULL; + loc_t *loc = NULL; + gf_boolean_t free_gfid = _gf_true; + + local = frame->local; + priv = this->private; + + local->post_res_handler = handler; + gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t); + if (!gfid) + goto err; + + switch (type) { + case SHARD_INTERNAL_DIR_DOT_SHARD: + gf_uuid_copy(*gfid, priv->dot_shard_gfid); + loc = &local->dot_shard_loc; + break; + case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME: + gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid); + loc = &local->dot_shard_rm_loc; + break; + default: + bzero(*gfid, sizeof(uuid_t)); + break; + } + + xattr_req = dict_new(); + if (!xattr_req) + goto err; + + ret = shard_init_internal_dir_loc(this, local, type); + if (ret) + goto err; + + ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED, + "Failed to set gfid-req for %s", shard_internal_dir_string(type)); + goto err; + } else { + free_gfid = _gf_false; + } + + SHARD_SET_ROOT_FS_ID(frame, local); + + STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc, + 0755, 0, xattr_req); + dict_unref(xattr_req); + return 0; - return ret; +err: + if (xattr_req) + dict_unref(xattr_req); + local->op_ret = -1; + local->op_errno = ENOMEM; + if (free_gfid) + GF_FREE(gfid); + handler(frame, this); + return 0; } -int -shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - int call_count = 0; - uint64_t fsync_count = 0; - fd_t *anon_fd = cookie; - shard_local_t *local = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *base_ictx = NULL; - inode_t *base_inode = NULL; - gf_boolean_t unref_shard_inode = _gf_false; +int shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { + /* To-Do: Wind flush on all shards of the file */ + SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata); + return 0; +} - local = frame->local; - base_inode = local->fd->inode; +int shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { + STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; +} - if (local->op_ret < 0) - goto out; +int __shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) { + int ret = -1; + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; - LOCK(&frame->lock); - { - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - UNLOCK(&frame->lock); - goto out; - } - shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, - SHARD_MASK_TIMES); - } - UNLOCK(&frame->lock); - fd_ctx_get(anon_fd, this, &fsync_count); -out: - if (anon_fd && (base_inode != anon_fd->inode)) { - LOCK(&base_inode->lock); - LOCK(&anon_fd->inode->lock); - { - __shard_inode_ctx_get(anon_fd->inode, this, &ctx); - __shard_inode_ctx_get(base_inode, this, &base_ictx); - if (op_ret == 0) - ctx->fsync_needed -= fsync_count; - GF_ASSERT(ctx->fsync_needed >= 0); - if (ctx->fsync_needed != 0) { - list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); - base_ictx->fsync_count++; - } else { - unref_shard_inode = _gf_true; - } - } - UNLOCK(&anon_fd->inode->lock); - UNLOCK(&base_inode->lock); - } + ret = __inode_ctx_get(inode, this, &ctx_uint); + if (ret < 0) + return ret; - if (unref_shard_inode) - inode_unref(anon_fd->inode); - if (anon_fd) - fd_unref(anon_fd); + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - call_count = shard_call_count_return(frame); - if (call_count != 0) - return 0; + local->postbuf.ia_ctime = ctx->stat.ia_ctime; + local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; + local->postbuf.ia_atime = ctx->stat.ia_atime; + local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; + local->postbuf.ia_mtime = ctx->stat.ia_mtime; + local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, - local->op_errno); - } else { - shard_get_timestamps_from_inode_ctx(local, base_inode, this); - SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, local->xattr_rsp); - } - return 0; + return 0; } -int -shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) -{ - int ret = 0; - int call_count = 0; - int fsync_count = 0; - fd_t *anon_fd = NULL; - inode_t *base_inode = NULL; - shard_local_t *local = NULL; - shard_inode_ctx_t *ctx = NULL; - shard_inode_ctx_t *iter = NULL; - struct list_head copy = { - 0, - }; - shard_inode_ctx_t *tmp = NULL; +int shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode, + xlator_t *this) { + int ret = 0; - local = frame->local; - base_inode = local->fd->inode; - local->postbuf = local->prebuf; - INIT_LIST_HEAD(©); + LOCK(&inode->lock); + { ret = __shard_get_timestamps_from_inode_ctx(local, inode, this); } + UNLOCK(&inode->lock); - if (local->op_ret < 0) { - shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, - local->op_errno); - return 0; - } + return ret; +} +int shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { + int call_count = 0; + uint64_t fsync_count = 0; + fd_t *anon_fd = cookie; + shard_local_t *local = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *base_ictx = NULL; + inode_t *base_inode = NULL; + gf_boolean_t unref_shard_inode = _gf_false; + + local = frame->local; + base_inode = local->fd->inode; + + if (local->op_ret < 0) + goto out; + + LOCK(&frame->lock); + { + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + UNLOCK(&frame->lock); + goto out; + } + shard_inode_ctx_set(local->fd->inode, this, postbuf, 0, SHARD_MASK_TIMES); + } + UNLOCK(&frame->lock); + fd_ctx_get(anon_fd, this, &fsync_count); +out: + if (anon_fd && (base_inode != anon_fd->inode)) { LOCK(&base_inode->lock); + LOCK(&anon_fd->inode->lock); { - __shard_inode_ctx_get(base_inode, this, &ctx); - list_splice_init(&ctx->to_fsync_list, ©); - call_count = ctx->fsync_count; - ctx->fsync_count = 0; - } + __shard_inode_ctx_get(anon_fd->inode, this, &ctx); + __shard_inode_ctx_get(base_inode, this, &base_ictx); + if (op_ret == 0) + ctx->fsync_needed -= fsync_count; + GF_ASSERT(ctx->fsync_needed >= 0); + if (ctx->fsync_needed != 0) { + list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list); + base_ictx->fsync_count++; + } else { + unref_shard_inode = _gf_true; + } + } + UNLOCK(&anon_fd->inode->lock); UNLOCK(&base_inode->lock); + } + + if (unref_shard_inode) + inode_unref(anon_fd->inode); + if (anon_fd) + fd_unref(anon_fd); + + call_count = shard_call_count_return(frame); + if (call_count != 0) + return 0; - local->call_count = ++call_count; + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, + local->op_errno); + } else { + shard_get_timestamps_from_inode_ctx(local, base_inode, this); + SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } + return 0; +} + +int shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this) { + int ret = 0; + int call_count = 0; + int fsync_count = 0; + fd_t *anon_fd = NULL; + inode_t *base_inode = NULL; + shard_local_t *local = NULL; + shard_inode_ctx_t *ctx = NULL; + shard_inode_ctx_t *iter = NULL; + struct list_head copy = { + 0, + }; + shard_inode_ctx_t *tmp = NULL; + + local = frame->local; + base_inode = local->fd->inode; + local->postbuf = local->prebuf; + INIT_LIST_HEAD(©); + + if (local->op_ret < 0) { + shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret, + local->op_errno); + return 0; + } + + LOCK(&base_inode->lock); + { + __shard_inode_ctx_get(base_inode, this, &ctx); + list_splice_init(&ctx->to_fsync_list, ©); + call_count = ctx->fsync_count; + ctx->fsync_count = 0; + } + UNLOCK(&base_inode->lock); + + local->call_count = ++call_count; + + /* Send fsync() on the base shard first */ + anon_fd = fd_ref(local->fd); + STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, + local->xattr_req); + call_count--; + anon_fd = NULL; + + list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) { + list_del_init(&iter->to_fsync_list); + fsync_count = 0; + shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); + GF_ASSERT(fsync_count > 0); + anon_fd = fd_anonymous(iter->inode); + if (!anon_fd) { + local->op_ret = -1; + local->op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, + "Failed to create " + "anon fd to fsync shard"); + shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, + NULL, NULL, NULL); + continue; + } - /* Send fsync() on the base shard first */ - anon_fd = fd_ref(local->fd); + ret = fd_ctx_set(anon_fd, this, fsync_count); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, + "Failed to set fd " + "ctx for shard inode gfid=%s", + uuid_utoa(iter->inode->gfid)); + local->op_ret = -1; + local->op_errno = ENOMEM; + shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, ENOMEM, + NULL, NULL, NULL); + continue; + } STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync, local->xattr_req); call_count--; - anon_fd = NULL; - - list_for_each_entry_safe(iter, tmp, ©, to_fsync_list) - { - list_del_init(&iter->to_fsync_list); - fsync_count = 0; - shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count); - GF_ASSERT(fsync_count > 0); - anon_fd = fd_anonymous(iter->inode); - if (!anon_fd) { - local->op_ret = -1; - local->op_errno = ENOMEM; - gf_msg(this->name, GF_LOG_WARNING, ENOMEM, - SHARD_MSG_MEMALLOC_FAILED, - "Failed to create " - "anon fd to fsync shard"); - shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, - ENOMEM, NULL, NULL, NULL); - continue; - } - - ret = fd_ctx_set(anon_fd, this, fsync_count); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED, - "Failed to set fd " - "ctx for shard inode gfid=%s", - uuid_utoa(iter->inode->gfid)); - local->op_ret = -1; - local->op_errno = ENOMEM; - shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1, - ENOMEM, NULL, NULL, NULL); - continue; - } - STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, - anon_fd, local->datasync, local->xattr_req); - call_count--; - } + } - return 0; + return 0; } -int -shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, - dict_t *xdata) -{ - int ret = 0; - uint64_t block_size = 0; - shard_local_t *local = NULL; +int shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) { + int ret = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size for %s from its inode ctx", - uuid_utoa(fd->inode->gfid)); - goto err; - } + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto err; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); - return 0; - } + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + return 0; + } - if (!this->itable) - this->itable = fd->inode->table; + if (!this->itable) + this->itable = fd->inode->table; - local = mem_get0(this->local_pool); - if (!local) - goto err; + local = mem_get0(this->local_pool); + if (!local) + goto err; - frame->local = local; + frame->local = local; - local->fd = fd_ref(fd); - local->fop = GF_FOP_FSYNC; - local->datasync = datasync; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; + local->fd = fd_ref(fd); + local->fop = GF_FOP_FSYNC; + local->datasync = datasync; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; - local->loc.inode = inode_ref(fd->inode); - gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); - shard_lookup_base_file(frame, this, &local->loc, - shard_post_lookup_fsync_handler); - return 0; + shard_lookup_base_file(frame, this, &local->loc, + shard_post_lookup_fsync_handler); + return 0; err: - shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM); + return 0; } -int -shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, gf_dirent_t *orig_entries, - dict_t *xdata) -{ - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - shard_local_t *local = NULL; +int shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, + gf_dirent_t *orig_entries, dict_t *xdata) { + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + shard_local_t *local = NULL; - local = frame->local; + local = frame->local; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) - { - list_del_init(&entry->list); - list_add_tail(&entry->list, &local->entries_head.list); + list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { + list_del_init(&entry->list); + list_add_tail(&entry->list, &local->entries_head.list); - if (!entry->dict) - continue; + if (!entry->dict) + continue; - if (IA_ISDIR(entry->d_stat.ia_type)) - continue; + if (IA_ISDIR(entry->d_stat.ia_type)) + continue; - if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) - shard_modify_size_and_block_count(&entry->d_stat, entry->dict); - if (!entry->inode) - continue; + if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE)) + shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + if (!entry->inode) + continue; - shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); - } - local->op_ret += op_ret; + shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); + } + local->op_ret += op_ret; unwind: - if (local->fop == GF_FOP_READDIR) - SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, - &local->entries_head, xdata); - else - SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, - &local->entries_head, xdata); - return 0; + if (local->fop == GF_FOP_READDIR) + SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, + &local->entries_head, xdata); + else + SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, + xdata); + return 0; } -int32_t -shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries, - dict_t *xdata) -{ - fd_t *fd = NULL; - gf_dirent_t *entry = NULL; - gf_dirent_t *tmp = NULL; - shard_local_t *local = NULL; - gf_boolean_t last_entry = _gf_false; +int32_t shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + gf_dirent_t *orig_entries, dict_t *xdata) { + fd_t *fd = NULL; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + shard_local_t *local = NULL; + gf_boolean_t last_entry = _gf_false; - local = frame->local; - fd = local->fd; + local = frame->local; + fd = local->fd; - if (op_ret < 0) - goto unwind; + if (op_ret < 0) + goto unwind; - list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) - { - if (last_entry) - last_entry = _gf_false; - - if (__is_root_gfid(fd->inode->gfid) && - !(strcmp(entry->d_name, GF_SHARD_DIR))) { - local->offset = entry->d_off; - op_ret--; - last_entry = _gf_true; - continue; - } + list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list) { + if (last_entry) + last_entry = _gf_false; - list_del_init(&entry->list); - list_add_tail(&entry->list, &local->entries_head.list); + if (__is_root_gfid(fd->inode->gfid) && + !(strcmp(entry->d_name, GF_SHARD_DIR))) { + local->offset = entry->d_off; + op_ret--; + last_entry = _gf_true; + continue; + } - if (!entry->dict) - continue; + list_del_init(&entry->list); + list_add_tail(&entry->list, &local->entries_head.list); - if (IA_ISDIR(entry->d_stat.ia_type)) - continue; + if (!entry->dict) + continue; - if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && - frame->root->pid != GF_CLIENT_PID_GSYNCD) - shard_modify_size_and_block_count(&entry->d_stat, entry->dict); + if (IA_ISDIR(entry->d_stat.ia_type)) + continue; - if (!entry->inode) - continue; + if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) && + frame->root->pid != GF_CLIENT_PID_GSYNCD) + shard_modify_size_and_block_count(&entry->d_stat, entry->dict); - shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); - } + if (!entry->inode) + continue; - local->op_ret = op_ret; + shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat); + } - if (last_entry) { - if (local->fop == GF_FOP_READDIR) - STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir, - local->fd, local->readdir_size, local->offset, - local->xattr_req); - else - STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, - local->fd, local->readdir_size, local->offset, - local->xattr_req); - return 0; - } + local->op_ret = op_ret; -unwind: + if (last_entry) { if (local->fop == GF_FOP_READDIR) - SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, - &local->entries_head, xdata); + STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, local->fd, + local->readdir_size, local->offset, local->xattr_req); else - SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, - &local->entries_head, xdata); + STACK_WIND(frame, shard_readdir_past_dot_shard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, local->fd, + local->readdir_size, local->offset, local->xattr_req); return 0; -} + } -int -shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, int whichop, dict_t *xdata) -{ - int ret = 0; - shard_local_t *local = NULL; - - local = mem_get0(this->local_pool); - if (!local) { - goto err; +unwind: + if (local->fop == GF_FOP_READDIR) + SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno, &local->entries_head, + xdata); + else + SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &local->entries_head, + xdata); + return 0; +} + +int shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, int whichop, dict_t *xdata) { + int ret = 0; + shard_local_t *local = NULL; + + local = mem_get0(this->local_pool); + if (!local) { + goto err; + } + + frame->local = local; + + local->fd = fd_ref(fd); + local->fop = whichop; + local->readdir_size = size; + INIT_LIST_HEAD(&local->entries_head.list); + local->list_inited = _gf_true; + + if (whichop == GF_FOP_READDIR) { + STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); + } else { + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); + ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to set " + "dict value: key:%s, directory gfid=%s", + GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); + goto err; } - frame->local = local; - - local->fd = fd_ref(fd); - local->fop = whichop; - local->readdir_size = size; - INIT_LIST_HEAD(&local->entries_head.list); - local->list_inited = _gf_true; - - if (whichop == GF_FOP_READDIR) { - STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata); - } else { - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, - local, err); - ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0); - if (ret) { - gf_log(this->name, GF_LOG_WARNING, - "Failed to set " - "dict value: key:%s, directory gfid=%s", - GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid)); - goto err; - } - - STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, size, offset, - local->xattr_req); - } + STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, + local->xattr_req); + } - return 0; + return 0; err: - STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); - return 0; + STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL); + return 0; } -int32_t -shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) -{ - shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); - return 0; +int32_t shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) { + shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata); + return 0; } -int32_t -shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, dict_t *xdata) -{ - shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); - return 0; +int32_t shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) { + shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata); + return 0; } -int32_t -shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - int op_errno = EINVAL; +int32_t shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { + int op_errno = EINVAL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); - } + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + } - if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); - } + if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); + } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_REMOVEXATTR, frame, -1, op_errno); + return 0; } -int32_t -shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) -{ - int op_errno = EINVAL; +int32_t shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { + int op_errno = EINVAL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); - } + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, out); + } - if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); - } + if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE); + } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_FREMOVEXATTR, frame, -1, op_errno); + return 0; } -int32_t -shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - if (op_ret < 0) - goto unwind; +int32_t shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { + if (op_ret < 0) + goto unwind; - if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); - } + if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); + } unwind: - SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } -int32_t -shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, - dict_t *xdata) -{ - int op_errno = EINVAL; +int32_t shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) { + int op_errno = EINVAL; - if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && - (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { - op_errno = ENODATA; - goto out; - } + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && + (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) { + op_errno = ENODATA; + goto out; + } - STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); - return 0; + STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno); + return 0; } -int32_t -shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, - dict_t *xdata) -{ - if (op_ret < 0) - goto unwind; +int32_t shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { + if (op_ret < 0) + goto unwind; - if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { - dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); - dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); - } + if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) { + dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE); + dict_del(dict, GF_XATTR_SHARD_FILE_SIZE); + } unwind: - SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; + SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; } -int32_t -shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - int op_errno = EINVAL; +int32_t shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { + int op_errno = EINVAL; - if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && - (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { - op_errno = ENODATA; - goto out; - } + if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) && + (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) { + op_errno = ENODATA; + goto out; + } - STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); - return 0; + STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno); + return 0; } -int32_t -shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - int op_errno = EINVAL; +int32_t shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int32_t flags, dict_t *xdata) { + int op_errno = EINVAL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); - } + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, + fd, dict, flags, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_FSETXATTR, frame, -1, op_errno); + return 0; } -int32_t -shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - int op_errno = EINVAL; +int32_t shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) { + int op_errno = EINVAL; - if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { - GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); - } + if (frame->root->pid != GF_CLIENT_PID_GSYNCD) { + GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, out); + } - STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); - return 0; + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); - return 0; + shard_common_failure_unwind(GF_FOP_SETXATTR, frame, -1, op_errno); + return 0; } -int -shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (local->fop == GF_FOP_SETATTR) { - if (local->op_ret >= 0) - shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, - SHARD_LOOKUP_MASK); - SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, local->xattr_rsp); - } else if (local->fop == GF_FOP_FSETATTR) { - if (local->op_ret >= 0) - shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, - SHARD_LOOKUP_MASK); - SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, - &local->prebuf, &local->postbuf, local->xattr_rsp); - } - - return 0; -} +int shard_post_setattr_handler(call_frame_t *frame, xlator_t *this) { + shard_local_t *local = NULL; -int -shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - shard_local_t *local = NULL; - - local = frame->local; - - if (op_ret < 0) { - local->op_ret = op_ret; - local->op_errno = op_errno; - goto unwind; - } + local = frame->local; - local->prebuf = *prebuf; - if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { - local->op_ret = -1; - local->op_errno = EINVAL; - goto unwind; - } - if (xdata) - local->xattr_rsp = dict_ref(xdata); - local->postbuf = *postbuf; - local->postbuf.ia_size = local->prebuf.ia_size; - local->postbuf.ia_blocks = local->prebuf.ia_blocks; + if (local->fop == GF_FOP_SETATTR) { + if (local->op_ret >= 0) + shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0, + SHARD_LOOKUP_MASK); + SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } else if (local->fop == GF_FOP_FSETATTR) { + if (local->op_ret >= 0) + shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0, + SHARD_LOOKUP_MASK); + SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno, + &local->prebuf, &local->postbuf, local->xattr_rsp); + } -unwind: - local->handler(frame, this); - return 0; + return 0; } -int -shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { - STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); - return 0; - } - - ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block size from inode ctx of %s", - uuid_utoa(loc->inode->gfid)); - goto err; - } - - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); - return 0; - } - - local = mem_get0(this->local_pool); - if (!local) - goto err; +int shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { + shard_local_t *local = NULL; - frame->local = local; + local = frame->local; - local->handler = shard_post_setattr_handler; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_SETATTR; - loc_copy(&local->loc, loc); + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unwind; + } - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, - local, err); + local->prebuf = *prebuf; + if (shard_modify_size_and_block_count(&local->prebuf, xdata)) { + local->op_ret = -1; + local->op_errno = EINVAL; + goto unwind; + } + if (xdata) + local->xattr_rsp = dict_ref(xdata); + local->postbuf = *postbuf; + local->postbuf.ia_size = local->prebuf.ia_size; + local->postbuf.ia_blocks = local->prebuf.ia_blocks; - STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, - local->xattr_req); - return 0; -err: - shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); - return 0; +unwind: + local->handler(frame, this); + return 0; } -int -shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - int ret = -1; - uint64_t block_size = 0; - shard_local_t *local = NULL; - - if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { - STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); - return 0; - } +int shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block size from inode ctx of %s", - uuid_utoa(fd->inode->gfid)); - goto err; - } + if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) { + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); - return 0; - } + ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(loc->inode->gfid)); + goto err; + } - if (!this->itable) - this->itable = fd->inode->table; + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; + } - local = mem_get0(this->local_pool); - if (!local) - goto err; + local = mem_get0(this->local_pool); + if (!local) + goto err; - frame->local = local; + frame->local = local; - local->handler = shard_post_setattr_handler; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto err; - local->fop = GF_FOP_FSETATTR; - local->fd = fd_ref(fd); + local->handler = shard_post_setattr_handler; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_SETATTR; + loc_copy(&local->loc, loc); - SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, - local, err); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid, + local, err); - STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, - local->xattr_req); - return 0; + STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, + local->xattr_req); + return 0; err: - shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM); + return 0; } -int -shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, - glusterfs_fop_t fop, fd_t *fd, - struct iovec *vector, int32_t count, - off_t offset, uint32_t flags, size_t len, - struct iobref *iobref, dict_t *xdata) -{ - int ret = 0; - int i = 0; - uint64_t block_size = 0; - shard_local_t *local = NULL; +int shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { + int ret = -1; + uint64_t block_size = 0; + shard_local_t *local = NULL; - ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, - "Failed to get block " - "size for %s from its inode ctx", - uuid_utoa(fd->inode->gfid)); - goto out; - } + if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) { + STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + } - if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { - /* block_size = 0 means that the file was created before - * sharding was enabled on the volume. - */ - switch (fop) { - case GF_FOP_WRITE: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, - count, offset, flags, iobref, xdata); - break; - case GF_FOP_FALLOCATE: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fallocate, fd, flags, - offset, len, xdata); - break; - case GF_FOP_ZEROFILL: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->zerofill, fd, offset, - len, xdata); - break; - case GF_FOP_DISCARD: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->discard, fd, offset, - len, xdata); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, - "Invalid fop id = %d", fop); - break; - } - return 0; - } + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block size from inode ctx of %s", + uuid_utoa(fd->inode->gfid)); + goto err; + } - if (!this->itable) - this->itable = fd->inode->table; + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; + } - local = mem_get0(this->local_pool); - if (!local) - goto out; + if (!this->itable) + this->itable = fd->inode->table; - frame->local = local; + local = mem_get0(this->local_pool); + if (!local) + goto err; - ret = syncbarrier_init(&local->barrier); - if (ret) - goto out; - local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); - if (!local->xattr_req) - goto out; - - if (vector) { - local->vector = iov_dup(vector, count); - if (!local->vector) - goto out; - for (i = 0; i < count; i++) - local->total_size += vector[i].iov_len; - local->count = count; - } else { - local->total_size = len; - } + frame->local = local; - local->fop = fop; - local->offset = offset; - local->flags = flags; - if (iobref) - local->iobref = iobref_ref(iobref); - local->fd = fd_ref(fd); - local->block_size = block_size; - local->resolver_base_inode = local->fd->inode; - GF_ATOMIC_INIT(local->delta_blocks, 0); + local->handler = shard_post_setattr_handler; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto err; + local->fop = GF_FOP_FSETATTR; + local->fd = fd_ref(fd); - local->loc.inode = inode_ref(fd->inode); - gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid, + local, err); - shard_lookup_base_file(frame, this, &local->loc, - shard_common_inode_write_post_lookup_handler); - return 0; + STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, + local->xattr_req); + return 0; +err: + shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM); + return 0; +} + +int shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this, + glusterfs_fop_t fop, fd_t *fd, + struct iovec *vector, int32_t count, + off_t offset, uint32_t flags, size_t len, + struct iobref *iobref, dict_t *xdata) { + int ret = 0; + int i = 0; + uint64_t block_size = 0; + shard_local_t *local = NULL; + + ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED, + "Failed to get block " + "size for %s from its inode ctx", + uuid_utoa(fd->inode->gfid)); + goto out; + } + + if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { + /* block_size = 0 means that the file was created before + * sharding was enabled on the volume. + */ + switch (fop) { + case GF_FOP_WRITE: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, + fd, vector, count, offset, flags, iobref, xdata); + break; + case GF_FOP_FALLOCATE: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, flags, offset, + len, xdata); + break; + case GF_FOP_ZEROFILL: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, + xdata); + break; + case GF_FOP_DISCARD: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP, + "Invalid fop id = %d", fop); + break; + } + return 0; + } + + if (!this->itable) + this->itable = fd->inode->table; + + local = mem_get0(this->local_pool); + if (!local) + goto out; + + frame->local = local; + + ret = syncbarrier_init(&local->barrier); + if (ret) + goto out; + local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new(); + if (!local->xattr_req) + goto out; + + if (vector) { + local->vector = iov_dup(vector, count); + if (!local->vector) + goto out; + for (i = 0; i < count; i++) + local->total_size += vector[i].iov_len; + local->count = count; + } else { + local->total_size = len; + } + + local->fop = fop; + local->offset = offset; + local->flags = flags; + if (iobref) + local->iobref = iobref_ref(iobref); + local->fd = fd_ref(fd); + local->block_size = block_size; + local->resolver_base_inode = local->fd->inode; + GF_ATOMIC_INIT(local->delta_blocks, 0); + + local->loc.inode = inode_ref(fd->inode); + gf_uuid_copy(local->loc.gfid, fd->inode->gfid); + + shard_lookup_base_file(frame, this, &local->loc, + shard_common_inode_write_post_lookup_handler); + return 0; out: - shard_common_failure_unwind(fop, frame, -1, ENOMEM); - return 0; + shard_common_failure_unwind(fop, frame, -1, ENOMEM); + return 0; } -int -shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, uint32_t flags, - struct iobref *iobref, dict_t *xdata) -{ - shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, - offset, flags, 0, iobref, xdata); - return 0; +int shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { + shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count, + offset, flags, 0, iobref, xdata); + return 0; } -int -shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t keep_size, off_t offset, size_t len, dict_t *xdata) -{ - if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && - (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) - goto out; +int shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, + dict_t *xdata) { + if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) && + (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))) + goto out; - shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, - offset, keep_size, len, NULL, xdata); - return 0; + shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0, + offset, keep_size, len, NULL, xdata); + return 0; out: - shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); - return 0; + shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP); + return 0; } -int -shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) -{ - shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, - offset, 0, len, NULL, xdata); - return 0; +int shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) { + shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0, + offset, 0, len, NULL, xdata); + return 0; } -int -shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) -{ - shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, - offset, 0, len, NULL, xdata); - return 0; +int shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { + shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0, + offset, 0, len, NULL, xdata); + return 0; } -int32_t -shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - gf_seek_what_t what, dict_t *xdata) -{ - /* TBD */ - gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, - "seek called on %s.", uuid_utoa(fd->inode->gfid)); - shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); - return 0; +int32_t shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) { + /* TBD */ + gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED, + "seek called on %s.", uuid_utoa(fd->inode->gfid)); + shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP); + return 0; } -int32_t -mem_acct_init(xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; +int32_t mem_acct_init(xlator_t *this) { + int ret = -1; - ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); + if (!this) + return ret; - if (ret != 0) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, - "Memory accounting init" - "failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1); + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED, + "Memory accounting init" + "failed"); return ret; + } + + return ret; } -int -init(xlator_t *this) -{ - int ret = -1; - shard_priv_t *priv = NULL; +int init(xlator_t *this) { + int ret = -1; + shard_priv_t *priv = NULL; - if (!this) { - gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, - "this is NULL. init() failed"); - return -1; - } + if (!this) { + gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS, + "this is NULL. init() failed"); + return -1; + } - if (!this->parents) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, - "Dangling volume. Check volfile"); - goto out; - } + if (!this->parents) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, + "Dangling volume. Check volfile"); + goto out; + } - if (!this->children || this->children->next) { - gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, - "shard not configured with exactly one sub-volume. " - "Check volfile"); - goto out; - } + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE, + "shard not configured with exactly one sub-volume. " + "Check volfile"); + goto out; + } - priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); - if (!priv) - goto out; + priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t); + if (!priv) + goto out; - GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); + GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out); - GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); + GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out); - GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); + GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out); - this->local_pool = mem_pool_new(shard_local_t, 128); - if (!this->local_pool) { - ret = -1; - goto out; - } - gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); - gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); + this->local_pool = mem_pool_new(shard_local_t, 128); + if (!this->local_pool) { + ret = -1; + goto out; + } + gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid); + gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid); - this->private = priv; - LOCK_INIT(&priv->lock); - INIT_LIST_HEAD(&priv->ilist_head); - ret = 0; + this->private = priv; + LOCK_INIT(&priv->lock); + INIT_LIST_HEAD(&priv->ilist_head); + ret = 0; out: - if (ret) { - GF_FREE(priv); - mem_pool_destroy(this->local_pool); - } + if (ret) { + GF_FREE(priv); + mem_pool_destroy(this->local_pool); + } - return ret; + return ret; } -void -fini(xlator_t *this) -{ - shard_priv_t *priv = NULL; +void fini(xlator_t *this) { + shard_priv_t *priv = NULL; - GF_VALIDATE_OR_GOTO("shard", this, out); + GF_VALIDATE_OR_GOTO("shard", this, out); - mem_pool_destroy(this->local_pool); - this->local_pool = NULL; + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; - priv = this->private; - if (!priv) - goto out; + priv = this->private; + if (!priv) + goto out; - this->private = NULL; - LOCK_DESTROY(&priv->lock); - GF_FREE(priv); + this->private = NULL; + LOCK_DESTROY(&priv->lock); + GF_FREE(priv); out: - return; + return; } -int -reconfigure(xlator_t *this, dict_t *options) -{ - int ret = -1; - shard_priv_t *priv = NULL; +int reconfigure(xlator_t *this, dict_t *options) { + int ret = -1; + shard_priv_t *priv = NULL; - priv = this->private; + priv = this->private; - GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); + GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out); - GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, - uint32, out); - ret = 0; + GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options, uint32, + out); + ret = 0; out: - return ret; + return ret; } -int -shard_forget(xlator_t *this, inode_t *inode) -{ - uint64_t ctx_uint = 0; - shard_inode_ctx_t *ctx = NULL; - shard_priv_t *priv = NULL; +int shard_forget(xlator_t *this, inode_t *inode) { + uint64_t ctx_uint = 0; + shard_inode_ctx_t *ctx = NULL; + shard_priv_t *priv = NULL; - priv = this->private; - if (!priv) - return 0; + priv = this->private; + if (!priv) + return 0; - inode_ctx_del(inode, this, &ctx_uint); - if (!ctx_uint) - return 0; + inode_ctx_del(inode, this, &ctx_uint); + if (!ctx_uint) + return 0; - ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; + ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint; - /* When LRU limit reaches inode will be forcefully removed from the - * table, inode needs to be removed from LRU of shard as well. - */ - if (!list_empty(&ctx->ilist)) { - LOCK(&priv->lock); - { - list_del_init(&ctx->ilist); - priv->inode_count--; - } - UNLOCK(&priv->lock); + /* When LRU limit reaches inode will be forcefully removed from the + * table, inode needs to be removed from LRU of shard as well. + */ + if (!list_empty(&ctx->ilist)) { + LOCK(&priv->lock); + { + list_del_init(&ctx->ilist); + priv->inode_count--; } - GF_FREE(ctx); + UNLOCK(&priv->lock); + } + GF_FREE(ctx); - return 0; + return 0; } -int -shard_release(xlator_t *this, fd_t *fd) -{ - /* TBD */ - return 0; +int shard_release(xlator_t *this, fd_t *fd) { + /* TBD */ + return 0; } -int -shard_priv_dump(xlator_t *this) -{ - shard_priv_t *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN] = { - 0, - }; - char *str = NULL; +int shard_priv_dump(xlator_t *this) { + shard_priv_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + char *str = NULL; - priv = this->private; + priv = this->private; - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); - gf_proc_dump_add_section("%s", key_prefix); - str = gf_uint64_2human_readable(priv->block_size); - gf_proc_dump_write("shard-block-size", "%s", str); - gf_proc_dump_write("inode-count", "%d", priv->inode_count); - gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); - gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); + gf_proc_dump_add_section("%s", key_prefix); + str = gf_uint64_2human_readable(priv->block_size); + gf_proc_dump_write("shard-block-size", "%s", str); + gf_proc_dump_write("inode-count", "%d", priv->inode_count); + gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head); + gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit); - GF_FREE(str); + GF_FREE(str); - return 0; + return 0; } -int -shard_releasedir(xlator_t *this, fd_t *fd) -{ - return 0; -} +int shard_releasedir(xlator_t *this, fd_t *fd) { return 0; } struct xlator_fops fops = { .lookup = shard_lookup, -- 1.8.3.1