cache.c

   1 /* cache.c: cache management
   2  *
   3  * Copyright (C) 2006-2014 cgit Development Team <cgit@lists.zx2c4.com>
   4  *
   5  * Licensed under GNU General Public License v2
   6  *   (see COPYING for full license text)
   7  *
   8  *
   9  * The cache is just a directory structure where each file is a cache slot,
  10  * and each filename is based on the hash of some key (e.g. the cgit url).
  11  * Each file contains the full key followed by the cached content for that
  12  * key.
  13  *
  14  */
  15
  16 #include "cgit.h"
  17 #include "cache.h"
  18 #include "html.h"
  19 #ifdef HAVE_LINUX_SENDFILE
  20 #include <sys/sendfile.h>
  21 #endif
  22
  23 #define CACHE_BUFSIZE (1024 * 4)
  24
  25 struct cache_slot {
  26         const char *key;
  27         size_t keylen;
  28         int ttl;
  29         cache_fill_fn fn;
  30         int cache_fd;
  31         int lock_fd;
  32         const char *cache_name;
  33         const char *lock_name;
  34         int match;
  35         struct stat cache_st;
  36         int bufsize;
  37         char buf[CACHE_BUFSIZE];
  38 };
  39
  40 /* Open an existing cache slot and fill the cache buffer with
  41  * (part of) the content of the cache file. Return 0 on success
  42  * and errno otherwise.
  43  */
  44 static int open_slot(struct cache_slot *slot)
  45 {
  46         char *bufz;
  47         ssize_t bufkeylen = -1;
  48
  49         slot->cache_fd = open(slot->cache_name, O_RDONLY);
  50         if (slot->cache_fd == -1)
  51                 return errno;
  52
  53         if (fstat(slot->cache_fd, &slot->cache_st))
  54                 return errno;
  55
  56         slot->bufsize = xread(slot->cache_fd, slot->buf, sizeof(slot->buf));
  57         if (slot->bufsize < 0)
  58                 return errno;
  59
  60         bufz = memchr(slot->buf, 0, slot->bufsize);
  61         if (bufz)
  62                 bufkeylen = bufz - slot->buf;
  63
  64         if (slot->key)
  65                 slot->match = bufkeylen == slot->keylen &&
  66                     !memcmp(slot->key, slot->buf, bufkeylen + 1);
  67
  68         return 0;
  69 }
  70
  71 /* Close the active cache slot */
  72 static int close_slot(struct cache_slot *slot)
  73 {
  74         int err = 0;
  75         if (slot->cache_fd > 0) {
  76                 if (close(slot->cache_fd))
  77                         err = errno;
  78                 else
  79                         slot->cache_fd = -1;
  80         }
  81         return err;
  82 }
  83
  84 /* Print the content of the active cache slot (but skip the key). */
  85 static int print_slot(struct cache_slot *slot)
  86 {
  87 #ifdef HAVE_LINUX_SENDFILE
  88         off_t start_off;
  89         int ret;
  90
  91         start_off = slot->keylen + 1;
  92
  93         do {
  94                 ret = sendfile(STDOUT_FILENO, slot->cache_fd, &start_off,
  95                                 slot->cache_st.st_size - start_off);
  96                 if (ret < 0) {
  97                         if (errno == EAGAIN || errno == EINTR)
  98                                 continue;
  99                         return errno;
 100                 }
 101                 return 0;
 102         } while (1);
 103 #else
 104         ssize_t i, j;
 105
 106         i = lseek(slot->cache_fd, slot->keylen + 1, SEEK_SET);
 107         if (i != slot->keylen + 1)
 108                 return errno;
 109
 110         do {
 111                 i = j = xread(slot->cache_fd, slot->buf, sizeof(slot->buf));
 112                 if (i > 0)
 113                         j = xwrite(STDOUT_FILENO, slot->buf, i);
 114         } while (i > 0 && j == i);
 115
 116         if (i < 0 || j != i)
 117                 return errno;
 118         else
 119                 return 0;
 120 #endif
 121 }
 122
 123 /* Check if the slot has expired */
 124 static int is_expired(struct cache_slot *slot)
 125 {
 126         if (slot->ttl < 0)
 127                 return 0;
 128         else
 129                 return slot->cache_st.st_mtime + slot->ttl * 60 < time(NULL);
 130 }
 131
 132 /* Check if the slot has been modified since we opened it.
 133  * NB: If stat() fails, we pretend the file is modified.
 134  */
 135 static int is_modified(struct cache_slot *slot)
 136 {
 137         struct stat st;
 138
 139         if (stat(slot->cache_name, &st))
 140                 return 1;
 141         return (st.st_ino != slot->cache_st.st_ino ||
 142                 st.st_mtime != slot->cache_st.st_mtime ||
 143                 st.st_size != slot->cache_st.st_size);
 144 }
 145
 146 /* Close an open lockfile */
 147 static int close_lock(struct cache_slot *slot)
 148 {
 149         int err = 0;
 150         if (slot->lock_fd > 0) {
 151                 if (close(slot->lock_fd))
 152                         err = errno;
 153                 else
 154                         slot->lock_fd = -1;
 155         }
 156         return err;
 157 }
 158
 159 /* Create a lockfile used to store the generated content for a cache
 160  * slot, and write the slot key + \0 into it.
 161  * Returns 0 on success and errno otherwise.
 162  */
 163 static int lock_slot(struct cache_slot *slot)
 164 {
 165         struct flock lock = {
 166                 .l_type = F_WRLCK,
 167                 .l_whence = SEEK_SET,
 168                 .l_start = 0,
 169                 .l_len = 0,
 170         };
 171
 172         slot->lock_fd = open(slot->lock_name, O_RDWR | O_CREAT,
 173                              S_IRUSR | S_IWUSR);
 174         if (slot->lock_fd == -1)
 175                 return errno;
 176         if (fcntl(slot->lock_fd, F_SETLK, &lock) < 0) {
 177                 int saved_errno = errno;
 178                 close(slot->lock_fd);
 179                 slot->lock_fd = -1;
 180                 return saved_errno;
 181         }
 182         if (xwrite(slot->lock_fd, slot->key, slot->keylen + 1) < 0)
 183                 return errno;
 184         return 0;
 185 }
 186
 187 /* Release the current lockfile. If `replace_old_slot` is set the
 188  * lockfile replaces the old cache slot, otherwise the lockfile is
 189  * just deleted.
 190  */
 191 static int unlock_slot(struct cache_slot *slot, int replace_old_slot)
 192 {
 193         int err;
 194
 195         if (replace_old_slot)
 196                 err = rename(slot->lock_name, slot->cache_name);
 197         else
 198                 err = unlink(slot->lock_name);
 199
 200         if (err)
 201                 return errno;
 202
 203         return 0;
 204 }
 205
 206 /* Generate the content for the current cache slot by redirecting
 207  * stdout to the lock-fd and invoking the callback function
 208  */
 209 static int fill_slot(struct cache_slot *slot)
 210 {
 211         int tmp;
 212
 213         /* Preserve stdout */
 214         tmp = dup(STDOUT_FILENO);
 215         if (tmp == -1)
 216                 return errno;
 217
 218         /* Redirect stdout to lockfile */
 219         if (dup2(slot->lock_fd, STDOUT_FILENO) == -1) {
 220                 close(tmp);
 221                 return errno;
 222         }
 223
 224         /* Generate cache content */
 225         slot->fn();
 226
 227         /* Make sure any buffered data is flushed to the file */
 228         if (fflush(stdout)) {
 229                 close(tmp);
 230                 return errno;
 231         }
 232
 233         /* update stat info */
 234         if (fstat(slot->lock_fd, &slot->cache_st)) {
 235                 close(tmp);
 236                 return errno;
 237         }
 238
 239         /* Restore stdout */
 240         if (dup2(tmp, STDOUT_FILENO) == -1) {
 241                 close(tmp);
 242                 return errno;
 243         }
 244
 245         /* Close the temporary filedescriptor */
 246         if (close(tmp))
 247                 return errno;
 248
 249         return 0;
 250 }
 251
 252 /* Crude implementation of 32-bit FNV-1 hash algorithm,
 253  * see http://www.isthe.com/chongo/tech/comp/fnv/ for details
 254  * about the magic numbers.
 255  */
 256 #define FNV_OFFSET 0x811c9dc5
 257 #define FNV_PRIME  0x01000193
 258
 259 unsigned long hash_str(const char *str)
 260 {
 261         unsigned long h = FNV_OFFSET;
 262         unsigned char *s = (unsigned char *)str;
 263
 264         if (!s)
 265                 return h;
 266
 267         while (*s) {
 268                 h *= FNV_PRIME;
 269                 h ^= *s++;
 270         }
 271         return h;
 272 }
 273
 274 static int process_slot(struct cache_slot *slot)
 275 {
 276         int err;
 277
 278         err = open_slot(slot);
 279         if (!err && slot->match) {
 280                 if (is_expired(slot)) {
 281                         if (!lock_slot(slot)) {
 282                                 /* If the cachefile has been replaced between
 283                                  * `open_slot` and `lock_slot`, we'll just
 284                                  * serve the stale content from the original
 285                                  * cachefile. This way we avoid pruning the
 286                                  * newly generated slot. The same code-path
 287                                  * is chosen if fill_slot() fails for some
 288                                  * reason.
 289                                  *
 290                                  * TODO? check if the new slot contains the
 291                                  * same key as the old one, since we would
 292                                  * prefer to serve the newest content.
 293                                  * This will require us to open yet another
 294                                  * file-descriptor and read and compare the
 295                                  * key from the new file, so for now we're
 296                                  * lazy and just ignore the new file.
 297                                  */
 298                                 if (is_modified(slot) || fill_slot(slot)) {
 299                                         unlock_slot(slot, 0);
 300                                         close_lock(slot);
 301                                 } else {
 302                                         close_slot(slot);
 303                                         unlock_slot(slot, 1);
 304                                         slot->cache_fd = slot->lock_fd;
 305                                 }
 306                         }
 307                 }
 308                 if ((err = print_slot(slot)) != 0) {
 309                         cache_log("[cgit] error printing cache %s: %s (%d)\n",
 310                                   slot->cache_name,
 311                                   strerror(err),
 312                                   err);
 313                 }
 314                 close_slot(slot);
 315                 return err;
 316         }
 317
 318         /* If the cache slot does not exist (or its key doesn't match the
 319          * current key), lets try to create a new cache slot for this
 320          * request. If this fails (for whatever reason), lets just generate
 321          * the content without caching it and fool the caller to believe
 322          * everything worked out (but print a warning on stdout).
 323          */
 324
 325         close_slot(slot);
 326         if ((err = lock_slot(slot)) != 0) {
 327                 cache_log("[cgit] Unable to lock slot %s: %s (%d)\n",
 328                           slot->lock_name, strerror(err), err);
 329                 slot->fn();
 330                 return 0;
 331         }
 332
 333         if ((err = fill_slot(slot)) != 0) {
 334                 cache_log("[cgit] Unable to fill slot %s: %s (%d)\n",
 335                           slot->lock_name, strerror(err), err);
 336                 unlock_slot(slot, 0);
 337                 close_lock(slot);
 338                 slot->fn();
 339                 return 0;
 340         }
 341         // We've got a valid cache slot in the lock file, which
 342         // is about to replace the old cache slot. But if we
 343         // release the lockfile and then try to open the new cache
 344         // slot, we might get a race condition with a concurrent
 345         // writer for the same cache slot (with a different key).
 346         // Lets avoid such a race by just printing the content of
 347         // the lock file.
 348         slot->cache_fd = slot->lock_fd;
 349         unlock_slot(slot, 1);
 350         if ((err = print_slot(slot)) != 0) {
 351                 cache_log("[cgit] error printing cache %s: %s (%d)\n",
 352                           slot->cache_name,
 353                           strerror(err),
 354                           err);
 355         }
 356         close_slot(slot);
 357         return err;
 358 }
 359
 360 /* Print cached content to stdout, generate the content if necessary. */
 361 int cache_process(int size, const char *path, const char *key, int ttl,
 362                   cache_fill_fn fn)
 363 {
 364         unsigned long hash;
 365         int i;
 366         struct strbuf filename = STRBUF_INIT;
 367         struct strbuf lockname = STRBUF_INIT;
 368         struct cache_slot slot;
 369         int result;
 370
 371         /* If the cache is disabled, just generate the content */
 372         if (size <= 0 || ttl == 0) {
 373                 fn();
 374                 return 0;
 375         }
 376
 377         /* Verify input, calculate filenames */
 378         if (!path) {
 379                 cache_log("[cgit] Cache path not specified, caching is disabled\n");
 380                 fn();
 381                 return 0;
 382         }
 383         if (!key)
 384                 key = "";
 385         hash = hash_str(key) % size;
 386         strbuf_addstr(&filename, path);
 387         strbuf_ensure_end(&filename, '/');
 388         for (i = 0; i < 8; i++) {
 389                 strbuf_addf(&filename, "%x", (unsigned char)(hash & 0xf));
 390                 hash >>= 4;
 391         }
 392         strbuf_addbuf(&lockname, &filename);
 393         strbuf_addstr(&lockname, ".lock");
 394         slot.fn = fn;
 395         slot.ttl = ttl;
 396         slot.cache_name = filename.buf;
 397         slot.lock_name = lockname.buf;
 398         slot.key = key;
 399         slot.keylen = strlen(key);
 400         result = process_slot(&slot);
 401
 402         strbuf_release(&filename);
 403         strbuf_release(&lockname);
 404         return result;
 405 }
 406
 407 /* Return a strftime formatted date/time
 408  * NB: the result from this function is to shared memory
 409  */
 410 static char *sprintftime(const char *format, time_t time)
 411 {
 412         static char buf[64];
 413         struct tm *tm;
 414
 415         if (!time)
 416                 return NULL;
 417         tm = gmtime(&time);
 418         strftime(buf, sizeof(buf)-1, format, tm);
 419         return buf;
 420 }
 421
 422 int cache_ls(const char *path)
 423 {
 424         DIR *dir;
 425         struct dirent *ent;
 426         int err = 0;
 427         struct cache_slot slot = { NULL };
 428         struct strbuf fullname = STRBUF_INIT;
 429         size_t prefixlen;
 430
 431         if (!path) {
 432                 cache_log("[cgit] cache path not specified\n");
 433                 return -1;
 434         }
 435         dir = opendir(path);
 436         if (!dir) {
 437                 err = errno;
 438                 cache_log("[cgit] unable to open path %s: %s (%d)\n",
 439                           path, strerror(err), err);
 440                 return err;
 441         }
 442         strbuf_addstr(&fullname, path);
 443         strbuf_ensure_end(&fullname, '/');
 444         prefixlen = fullname.len;
 445         while ((ent = readdir(dir)) != NULL) {
 446                 if (strlen(ent->d_name) != 8)
 447                         continue;
 448                 strbuf_setlen(&fullname, prefixlen);
 449                 strbuf_addstr(&fullname, ent->d_name);
 450                 slot.cache_name = fullname.buf;
 451                 if ((err = open_slot(&slot)) != 0) {
 452                         cache_log("[cgit] unable to open path %s: %s (%d)\n",
 453                                   fullname.buf, strerror(err), err);
 454                         continue;
 455                 }
 456                 htmlf("%s %s %10"PRIuMAX" %s\n",
 457                       fullname.buf,
 458                       sprintftime("%Y-%m-%d %H:%M:%S",
 459                                   slot.cache_st.st_mtime),
 460                       (uintmax_t)slot.cache_st.st_size,
 461                       slot.buf);
 462                 close_slot(&slot);
 463         }
 464         closedir(dir);
 465         strbuf_release(&fullname);
 466         return 0;
 467 }
 468
 469 /* Print a message to stdout */
 470 void cache_log(const char *format, ...)
 471 {
 472         va_list args;
 473         va_start(args, format);
 474         vfprintf(stderr, format, args);
 475         va_end(args);
 476 }
 477