[Groonga-commit] groonga/grnxx at 0b09d5b [master] Use a history to make grnxx::map::Patricia::add() fast.

Back to archive index

susumu.yata null+****@clear*****
Mon Jun 24 16:33:41 JST 2013


susumu.yata	2013-06-24 16:33:41 +0900 (Mon, 24 Jun 2013)

  New Revision: 0b09d5b8a6673564aa22b83955337939d392814f
  https://github.com/groonga/grnxx/commit/0b09d5b8a6673564aa22b83955337939d392814f

  Message:
    Use a history to make grnxx::map::Patricia::add() fast.

  Modified files:
    lib/grnxx/map/patricia.cpp

  Modified: lib/grnxx/map/patricia.cpp (+132 -122)
===================================================================
--- lib/grnxx/map/patricia.cpp    2013-06-21 19:28:35 +0900 (70c10a3)
+++ lib/grnxx/map/patricia.cpp    2013-06-24 16:33:41 +0900 (3173367)
@@ -675,44 +675,58 @@ bool Patricia<Bytes>::find(KeyArg key, int64_t *key_id) {
 }
 
 bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) {
-  const uint64_t bit_size = key.size() * 8;
   uint64_t node_id = ROOT_NODE_ID;
-  Node * node;
-  for (node = nodes_->get_pointer(node_id);
-       node && (node->status() != NODE_LEAF);
-       node = nodes_->get_pointer(node_id)) {
-    switch (node->status()) {
-      case NODE_DEAD: {
-        // The patricia is empty.
-        int64_t next_key_id;
-        if (!keys_->add(key, &next_key_id)) {
-          // Error.
-          return false;
-        }
-        *node = Node::leaf_node(next_key_id);
-        if (key_id) {
-          *key_id = next_key_id;
-        }
-        return true;
-      }
-      case NODE_BRANCH: {
-        node_id = node->offset();
-        if (node->bit_pos() < bit_size) {
-          node_id += get_ith_bit(key, node->bit_pos());
-        }
+  Node *node = nodes_->get_pointer(node_id);
+  if (!node) {
+    // Error.
+    return false;
+  }
+  if (node->status() == NODE_DEAD) {
+    // The patricia is empty.
+    int64_t next_key_id;
+    if (!keys_->add(key, &next_key_id)) {
+      // Error.
+      return false;
+    }
+    *node = Node::leaf_node(next_key_id);
+    if (key_id) {
+      *key_id = next_key_id;
+    }
+    return true;
+  }
+  const uint64_t bit_size = key.size() * 8;
+  Node *history[8];
+  int depth = 0;
+  history[0] = node;
+  while (node->status() != NODE_LEAF) {
+    if (node->status() == NODE_BRANCH) {
+      if (node->bit_pos() >= bit_size) {
         break;
       }
-      case NODE_TERMINAL: {
-        node_id = node->offset() + (node->bit_size() < bit_size);
+      node_id = node->offset() + get_ith_bit(key, node->bit_pos());
+    } else {
+      if (node->bit_size() >= bit_size) {
         break;
       }
+      node_id = node->offset() + 1;
+    }
+    node = nodes_->get_pointer(node_id);
+    if (!node) {
+      // Error.
+      return false;
     }
+    history[++depth % 8] = node;
   }
-  if (!node) {
-    // Error.
-    return false;
+  // Find a leaf node.
+  while (node->status() != NODE_LEAF) {
+    node_id = node->offset();
+    node = nodes_->get_pointer(node_id);
+    if (!node) {
+      // Error.
+      return false;
+    }
   }
-  // "node" points to a leaf node.
+  // Count the number of the common prefix bits.
   Key stored_key;
   if (!keys_->get_key(node->key_id(), &stored_key)) {
     // Error.
@@ -726,11 +740,7 @@ bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) {
       break;
     }
   }
-  if (count < min_size) {
-    const uint8_t diff = key[count] ^ stored_key[count];
-    count *= 8;
-    count += 7 - bit_scan_reverse(diff);
-  } else {
+  if (count == min_size) {
     if (key.size() == stored_key.size()) {
       // Found.
       if (key_id) {
@@ -738,84 +748,96 @@ bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) {
       }
       return false;
     }
-    count *= 8;
+    node = history[depth % 8];
+    Node * const next_nodes = nodes_->get_pointer(header_->next_node_id);
+    if (!next_nodes) {
+      return false;
+    }
+    int64_t next_key_id;
+    if (!keys_->add(key, &next_key_id)) {
+      // Error.
+      return false;
+    }
+    if (count == key.size()) {
+      // "key" is a prefix of "stored_key".
+      next_nodes[0] = Node::leaf_node(next_key_id);
+      next_nodes[1] = *node;
+      *node = Node::terminal_node(count * 8, header_->next_node_id);
+    } else {
+      // "stored_key" is a prefix of "key".
+      next_nodes[0] = *node;
+      next_nodes[1] = Node::leaf_node(next_key_id);
+      *node = Node::terminal_node(count * 8, header_->next_node_id);
+    }
+    header_->next_node_id += 2;
+    if (key_id) {
+      *key_id = next_key_id;
+    }
+    return true;
   }
-  Node * const next_nodes = nodes_->get_pointer(header_->next_node_id);
+  count = (count * 8) + 7 - bit_scan_reverse(key[count] ^ stored_key[count]);
+  // Find the branching point in "history".
+  int min_depth = (depth < 8) ? 0 : depth - 7;
+  while (--depth >= min_depth) {
+    node = history[depth % 8];
+    if (node->status() == NODE_BRANCH) {
+      if (node->bit_pos() < count) {
+        break;
+      }
+    } else if (node->bit_size() < count) {
+      break;
+    }
+  }
+  if (depth >= min_depth) {
+    // The branching point exists in "history".
+    node = history[(depth + 1) % 8];
+    Node * const next_nodes = nodes_->get_pointer(header_->next_node_id);
+    if (!next_nodes) {
+      return false;
+    }
+    int64_t next_key_id;
+    if (!keys_->add(key, &next_key_id)) {
+      // Error.
+      return false;
+    }
+    if (get_ith_bit(key, count)) {
+      next_nodes[0] = *node;
+      next_nodes[1] = Node::leaf_node(next_key_id);
+    } else {
+      next_nodes[0] = Node::leaf_node(next_key_id);
+      next_nodes[1] = *node;
+    }
+    *node = Node::branch_node(count, header_->next_node_id);
+    header_->next_node_id += 2;
+    if (key_id) {
+      *key_id = next_key_id;
+    }
+    return true;
+  }
+  // Find the branching point with the naive method.
   node_id = ROOT_NODE_ID;
-  for (node = nodes_->get_pointer(node_id);
-       node && (node->status() != NODE_LEAF);
-       node = nodes_->get_pointer(node_id)) {
-    switch (node->status()) {
-      case NODE_BRANCH: {
-        if (count <= node->bit_pos()) {
-          int64_t next_key_id;
-          if (!keys_->add(key, &next_key_id)) {
-            // Error.
-            return false;
-          }
-          if (count == bit_size) {
-            // Create a terminal node.
-            next_nodes[0] = Node::leaf_node(next_key_id);
-            next_nodes[1] = *node;
-            *node = Node::terminal_node(count, header_->next_node_id);
-          } else {
-            // Create a branch node.
-            if (get_ith_bit(key, count)) {
-              next_nodes[0] = *node;
-              next_nodes[1] = Node::leaf_node(next_key_id);
-            } else {
-              next_nodes[0] = Node::leaf_node(next_key_id);
-              next_nodes[1] = *node;
-            }
-            *node = Node::branch_node(count, header_->next_node_id);
-          }
-          header_->next_node_id += 2;
-          if (key_id) {
-            *key_id = next_key_id;
-          }
-          return true;
-        }
-        node_id = node->offset();
-        if (node->bit_pos() < count) {
-          node_id += get_ith_bit(key, node->bit_pos());
-        }
+  for ( ; ; ) {
+    node = nodes_->get_pointer(node_id);
+    if (!node) {
+      // Error.
+      return false;
+    }
+    if (node->status() == NODE_LEAF) {
+      break;
+    } else if (node->status() == NODE_BRANCH) {
+      if (node->bit_pos() >= count) {
         break;
       }
-      case NODE_TERMINAL: {
-        if (count < node->bit_size()) {
-          int64_t next_key_id;
-          if (!keys_->add(key, &next_key_id)) {
-            // Error.
-            return false;
-          }
-          if (count == bit_size) {
-            // Create a terminal node.
-            next_nodes[0] = Node::leaf_node(next_key_id);
-            next_nodes[1] = *node;
-            *node = Node::terminal_node(count, header_->next_node_id);
-          } else {
-            // Create a branch node.
-            if (get_ith_bit(key, count)) {
-              next_nodes[0] = *node;
-              next_nodes[1] = Node::leaf_node(next_key_id);
-            } else {
-              next_nodes[0] = Node::leaf_node(next_key_id);
-              next_nodes[1] = *node;
-            }
-            *node = Node::branch_node(count, header_->next_node_id);
-          }
-          header_->next_node_id += 2;
-          if (key_id) {
-            *key_id = next_key_id;
-          }
-          return true;
-        }
-        node_id = node->offset() + (node->bit_size() < bit_size);
+      node_id = node->offset() + get_ith_bit(key, node->bit_pos());
+    } else {
+      if (node->bit_size() > count) {
         break;
       }
+      node_id = node->offset() + 1;
     }
   }
-  if (!node) {
+  Node * const next_nodes = nodes_->get_pointer(header_->next_node_id);
+  if (!next_nodes) {
     // Error.
     return false;
   }
@@ -824,27 +846,15 @@ bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) {
     // Error.
     return false;
   }
-  if (count == bit_size) {
-    // Create a terminal node.
-    next_nodes[0] = Node::leaf_node(next_key_id);
-    next_nodes[1] = *node;
-    *node = Node::terminal_node(count, header_->next_node_id);
-  } else if (count == (stored_key.size() * 8)) {
-    // Create a terminal node.
+  // Create a branch node.
+  if (get_ith_bit(key, count)) {
     next_nodes[0] = *node;
     next_nodes[1] = Node::leaf_node(next_key_id);
-    *node = Node::terminal_node(count, header_->next_node_id);
   } else {
-    // Create a branch node.
-    if (get_ith_bit(key, count)) {
-      next_nodes[0] = *node;
-      next_nodes[1] = Node::leaf_node(next_key_id);
-    } else {
-      next_nodes[0] = Node::leaf_node(next_key_id);
-      next_nodes[1] = *node;
-    }
-    *node = Node::branch_node(count, header_->next_node_id);
+    next_nodes[0] = Node::leaf_node(next_key_id);
+    next_nodes[1] = *node;
   }
+  *node = Node::branch_node(count, header_->next_node_id);
   header_->next_node_id += 2;
   if (key_id) {
     *key_id = next_key_id;
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index