Update views on document save

CouchDB defaults to regenerating views the first time they are accessed. This behavior is preferable in most cases as it optimizes the resource utilization on the database server. On the other hand, in some situations the benefit of always having fast and updated views far outweigh the cost of regenerating them every time the database server receives updates. This can be achieved by supplying an updater script that calls the views when needed.

Example using ruby

couch.ini

(0.8) Add the following line to the couch.ini file

DbUpdateNotificationProcess=/PATH/TO/view_updater.rb

(0.9+) Add the following section to the local.ini file:

[update_notification]
view_updater=/PATH/TO/view_updater.rb

view_updater.rb

The following script updates the views for each tenth update made to the database or at most once every second when a lot of saves are performed

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
<!-- #!/usr/bin/ruby -->

<!-- ################### -->
<!-- # CONF            # -->
<!-- ################### -->

<!-- # The smallest amount of changed documents before the views are updated -->

MIN_NUM_OF_CHANGED_DOCS = 10

<!-- # URL to the DB on the CouchDB server -->

URL = "http://localhost:5984"

<!-- # Set the minimum pause between calls to the database -->

PAUSE = 1 # seconds

<!-- # One entry for each design document -->
<!-- # in each database -->

VIEWS = {"my_db"  => {"design_doc" => "view_name"}}

<!-- ################### -->
<!-- # RUNTIME         # -->
<!-- ################### -->

run = true
number_of_changed_docs = {}

threads = []

<!-- # Updates the views -->

threads << Thread.new do

  while run do

    number_of_changed_docs.each_pair do |db_name, number_of_docs|
      if number_of_docs >= MIN_NUM_OF_CHANGED_DOCS

        # Reset the value
        number_of_changed_docs[db_name] = 0

        # If there are views in the database, get them
        if VIEWS[db_name]
          VIEWS[db_name].each do |design, view|
            `curl #{URL}/#{db_name}/_design/#{design}/_view/#{view}?limit=0`
          end  
        end

      end
    end

    # Pause before starting over again
    sleep PAUSE

  end

end

<!-- # Receives the update notification from CouchDB -->

threads << Thread.new do

  while run do

    STDERR << "Waiting for input\n"
    update_call = gets

    # When CouchDB exits the script gets called with
    # a never ending series of nil
    if update_call == nil
      run = false
    else

      # Get the database name out of the call data
      # The data looks somethind like this:
      # {"type":"updated","db":"DB_NAME"}\n
      update_call =~ /\"db\":\"(\w+)\"/
      database_name = $1

      # Set to 0 if it hasn't been initialized before
      number_of_changed_docs[$1] ||= 0

      # Add one pending changed document to the list of documents
      # in the DB
      number_of_changed_docs[$1] += 1

    end

  end

end

<!-- # Good bye -->

threads.each {|thr| thr.join}

The view_updater.rb itself has to be made executable by CouchDB (chmod 0700?).

Example using Python

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
<!-- #!/usr/bin/env python -->
<!-- # -*- coding: utf-8 -*- -->

"""Updater script to regenerate couchdb views on update.
"""

import logging
logging.basicConfig(level=logging.INFO)

import os
import re
import signal
import sys
import time
import urllib2

from threading import Thread

flags = {
    'is_running': True
}

changed_docs = {}

class ViewUpdater(object):
    """Updates the views.
    """

    # The smallest amount of changed documents before the views are updated
    MIN_NUM_OF_CHANGED_DOCS = 50

    # Set the minimum pause between calls to the database
    PAUSE = 5 # seconds

    # URL to the DB on the CouchDB server
    URL = "http://localhost:5984"

    # One entry for each design document 
    # in each database
    VIEWS = {
        'my_db': {
            'design_doc': [
                'view_name',
                # ...
            ]
        }
    }

    def start(self):
        Thread(target=self._run).start()

    def _run(self):
        """Loop, checking for enough ``changed_docs`` to trigger a
          request to couchdb to re-index.
        """

        while flags['is_running']:
            try:
                for db_name, number_of_docs in changed_docs.items():
                    if number_of_docs >= self.MIN_NUM_OF_CHANGED_DOCS:
                        # Reset the value
                        del changed_docs[db_name]
                        # If there are views in the database, get them
                        if db_name in self.VIEWS:
                            logging.info('regenerating %s' % db_name)
                            db_views = self.VIEWS[db_name]
                            for design, views in db_views.iteritems():
                                for view in views:
                                    url = '%s/%s/_design/%s/_view/%s?limit=0' % (
                                        self.URL, db_name, design, view
                                    )
                                    urllib2.urlopen(url)
                time.sleep(self.PAUSE)
            except Exception:
                flags['is_running'] = False
                raise

class NotificationConsumer(object):
    """Receives the update notification from CouchDB.
    """

    DB_NAME_EXPRESSION = re.compile(r'\"db\":\"(\w+)\"')

    def _run(self):
        """Consume update notifications from stdin.
        """

        while flags['is_running']:
            try:
                data = sys.stdin.readline()
            except:
                continue
            else:
                if not data: # exit
                    flags['is_running'] = False
                    break
                result = self.DB_NAME_EXPRESSION.search(data)
                if result:
                    db_name = result.group(1)
                    # Set to 0 if it hasn't been initialized before
                    if db_name not in changed_docs:
                        changed_docs[db_name] = 0
                    # Add one pending changed document to the list
                    # of documents in the DB
                    changed_docs[db_name] += 1

    def start(self):
        t = Thread(target=self._run)
        t.start()
        return t

def main():
    logging.info('update_notification handler (re)starting')
    consumer = NotificationConsumer()
    updater = ViewUpdater()
    updater.start()
    t = consumer.start()
    try:
        while flags['is_running']:
            t.join(10)
    except KeyboardInterrupt, err:
        flags['is_running'] = False

if _\_name__ == '_\_main_\_':
    main()