Binary Tree (add-only, unbalanced)

From liblfds.org
Revision as of 07:10, 31 May 2016 by Admin (talk | contribs) (→‎Benchmark Results and Analysis)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Source Files

└───liblfds710
    ├───inc
    │   └───liblfds710
    │           lfds710_btree_addonly_unbalanced.h
    └───src
        └───lfds710_btree_addonly_unbalanced
                lfds710_btree_addonly_unbalanced_cleanup.c
                lfds710_btree_addonly_unbalanced_get.c
                lfds710_btree_addonly_unbalanced_init.c
                lfds710_btree_addonly_unbalanced_insert.c
                lfds710_btree_addonly_unbalanced_internal.h
                lfds710_btree_addonly_unbalanced_query.c

Enums

enum lfds710_btree_au_absolute_position;
enum lfds710_btree_au_existing_key;
enum lfds710_btree_au_insert_result;
enum lfds710_btree_au_query;
enum lfds710_btree_au_relative_position;

Opaque Structures

struct lfds710_btree_au_element;
struct lfds710_btree_au_state;

Macros

#define LFDS710_BTREE_AU_GET_KEY_FROM_ELEMENT( btree_au_element )
#define LFDS710_BTREE_AU_SET_KEY_IN_ELEMENT( btree_au_element, new_key )

#define LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( btree_au_element )
#define LFDS710_BTREE_AU_SET_VALUE_IN_ELEMENT( btree_au_element, new_value )

#define LFDS710_BTREE_AU_GET_USER_STATE_FROM_STATE( btree_au_state )

Prototypes

void lfds710_btree_au_init_valid_on_current_logical_core( struct lfds710_btree_au_state *baus,
                                                          int (*key_compare_function)(void const *new_key, void const *existing_key),
                                                          enum lfds710_btree_au_existing_key existing_key,
                                                          void *user_state );

void lfds710_btree_au_cleanup( struct lfds710_btree_au_state *baus,
                               void (*element_cleanup_callback)(struct lfds710_btree_au_state *baus, struct lfds710_btree_au_element *baue) );

enum lfds710_btree_au_insert_result lfds710_btree_au_insert( struct lfds710_btree_au_state *baus,
                                                             struct lfds710_btree_au_element *baue,
                                                             struct lfds710_btree_au_element **existing_baue );

int lfds710_btree_au_get_by_key( struct lfds710_btree_au_state *baus, 
                                 int (*key_compare_function)(void const *new_key, void const *existing_key),
                                 void *key,
                                 struct lfds710_btree_au_element **baue );

int lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position( struct lfds710_btree_au_state *baus,
                                                                             struct lfds710_btree_au_element **baue,
                                                                             enum lfds710_btree_au_absolute_position absolute_position,
                                                                             enum lfds710_btree_au_relative_position relative_position );

int lfds710_btree_au_get_by_absolute_position( struct lfds710_btree_au_state *baus,
                                               struct lfds710_btree_au_element **baue,
                                               enum lfds710_btree_au_absolute_position absolute_position );

int lfds710_btree_au_get_by_relative_position( struct lfds710_btree_au_element **baue,
                                               enum lfds710_btree_au_relative_position relative_position );
 
void lfds710_btree_au_query( struct lfds710_btree_au_state *baus,
                             enum lfds710_btree_au_query query_type,
                             void *query_input,
                             void *query_output );

Overview

This data structure implements an add-only, unbalanced btree. It supports any number of concurrent users, and internally implements exponential backoff to help deal with high load and so improve scalability (although being a btree it naturally acts to distribute memory access behaviour which helps scalability).

The implementation performs no allocations. The user is responsible for all allocations (and deallocations), where these allocations are passed into the API functions, which then use them. As such, allocations can be on the stack, on the heap, or as can sometimes be the the case in embedded systems, allocated with fixed addresses at compile time from a fixed global store. Allocations can also be shared memory, but in this case, the virtual addresses used must be the same in all processes.

General usage is that the user calls lfds710_btree_au_init_valid_on_current_logical_core to initialize a struct lfds710_btree_au_state, and then calls lfds710_btree_au_link to add elements. A btree element provides the ability to store a key (used to place elements in the btree) and a value, both of which are of type void * and can either point to data, or can be used directly, as key comparason is performed by a user-provided callback and the value is not touched by the btree code.

(See the section below, on lock-free specific behaviour, for an explanation of the unusual init function name.)

The key and value are get and set in elements by macros, such as LFDS710_BTREE_AU_SET_VALUE_IN_ELEMENT. The key can only be set in elements before they are added to a tree. The value can be set at any time, in elements both inside and outside of the tree.

The state and element structures are both public, present in the lfds710_btree_au.h header file, so that users can embed them in their own structures (and where necessary pass them to sizeof). Expected use is that user structures which are to enter btrees contain within themselves a struct lfds710_btree_au_element, where the user sets the key as necessary for the btree and the value to point to the user structure entering the btree. This approach permits zero run-time allocation of store and also ensures the btree element is normally in the same memory page as the user data it refers to.

When initializing the btree, the caller specifies the behaviour of the btree when the attempt is mde to add a new element which has a key already present in the btree. The btree can be configured to either fail to add the element, or it can be configured to overwrite the value in the existing element with the value of the new element.

Finally, when all is said and done, use lfds710_btree_au_cleanup to cleanup the tree. Once this function has returned, the user is then safe to deallocate all allocations.

Lock-free Specific Behaviour

The state initialization function, lfds710_btree_au_init_valid_on_current_logical_core, as the same suggests, initializes the state structure but that initialization is only valid on the current logical core. For the initialization to be valid on other logical cores (i.e. other threads where they are running on other logical cores) those other threads need to call the long-windedly named macro LFDS710_MISC_MAKE_VALID_ON_CURRENT_LOGICAL_CORE_INITS_COMPLETED_BEFORE_NOW_ON_ANY_OTHER_LOGICAL_CORE, which will do that which its name suggests.

Once a btree element structure has been linked to the btree, it cannot be deallocated (free, or stack allocation lifetimes ending due to say a thread ending, etc) until lfds710_btree_au_cleanup has returned. Elements cannot be removed from the tree, or moved within the tree. Their value however can be changed at any time.

The SET macro for the key in an element can only be correctly used on elements which are outside of a btree. The SET macro for the value in an element can be used at any time, on any element. By correctly is it meant to say that the GET macros will actually read the data written by the SET macros, and not some other data. I don't have to tell you how much chaos will ensure if different logical cores are reading different keys for the same element...

If shared memory is used for allocations, the virtual addresses must be the same across different processes.

Benchmark Results and Analysis

btree (add-only, unbalanced)
ARM32 MIPS32 x64
Raspberry Pi 2 Model B Ci20 AWS dedicated VM Core i5

The benchmark consists of a btree state which has 1024 elements per thread, where each element has a random integer key, where the benchmark loops, performing one read and then one write operation, where the key used for the read and the key used for the write are randomly selected from the pool of keys. The pthread rwlock uses a read lock for reading, and a write lock for writing. The locking versions of the btree have one lock for the entire btree, and so only one thread accesses the btree at a time (except in the case of rwlocks, where naturally any number of readers can access the tree at any time).

This beanchmark is in fact flawed. As the number of elements in the btree increases the mean number of steps through the btree to find an element increases. This means as the core count rises, the work being done by a single operation is increasing.

The btree data structure benefits massively from a lock-free implementation. In locking implementations there is typically one lock per tree, so the entire tree is single-threaded. With rwlocks, there can be any number of readers, but only a single writer, and the rwlock itself is a point of contention. With lock-free, any number of thread can execute concurrently, for read and for write, and the btree, by its distributed nature, inherently lacks particular memory locations which experience contention. End result is excellent scalability, as demonstrated in the 16 core AWS dedicated VM gnuplot.

Version 7.0.0 was inefficient in its use of cache lines. This actually led on the Raspberry Pi 2 Model B to the btree performing at about one tenth of the speed of the locking implementations! fixing this implementation error returned the btree to its usual place, about three times faster (on up to four cores).

The autotuning exponential backoff hardly makes any difference to btree performance. Where the elements are spread out in a probabalistically balanced tree (the keys are random), threads are issuing highly dispersed memory accesses and rarely collide.

White Paper

There is no white paper for this data structure; it is native to liblfds.

License

Standard liblfds license - there is no license. You are free to use this code in any way. Go forth and create wealth!

If however for legal reasons a licence is required, the license of your choice will be granted, and license for convenience is hereby granted up front for a range of popular licenses : the MIT license, the BSD license, the Apache license, the GPL and LPGL (all versions thereof) and the Creative Commons licenses (all of them). Additionally, this library (which is to say, the source code, build files, documentation, everything) is placed in the public domain.

Example

#include <stdio.h>
#include <stdlib.h>
#include "liblfds710.h"

struct test_data
{
  int long long unsigned
    unique_id;

  char
    payload[64];

  struct lfds710_btree_au_element
    buae;
};

int key_compare_function( void const *new_key, void const *existing_key )
{
  int
    cr = 0;

  int long long unsigned
    *new_key = (int long long unsigned *) new_key,
    *existing_key  = (int long long unsigned *) existing_key;

  if( *new_key > *existing_key )
    cr = 1;

  if( *new_key < *existing_key )
    cr = -1;

  return( cr );
}

int main()
{
  enum lfds710_btree_au_insert_result
    bauir;

  int long long unsigned
    loop;

  struct lfds710_btree_au_element
    *buae = NULL;

  struct lfds710_btree_au_state
    baus;
 
  struct test_data
    *td,
    *temp_td;
 
  lfds710_btree_au_init_valid_on_current_logical_core( &baus, key_compare_function, LFDS710_BTREE_AU_EXISTING_KEY_FAIL, NULL );

  // TRD : allocate ten test elements, populate with dummy data and link to tree
  td = malloc( sizeof(struct test_data) * 10 );

  for( loop = 0 ; loop < 10 ; loop++ )
  {
    td[loop].unique_id = loop;
    sprintf( td[loop].payload, "the unique id is %llu", loop );

    LFDS710_BTREE_AU_SET_KEY_IN_ELEMENT( td[loop].baue, &td[loop].unique_id );
    LFDS710_BTREE_AU_SET_VALUE_IN_ELEMENT( td[loop].baue, &td[loop] );

    bauir = lfds710_btree_au_insert( baus, &td[loop].baue, NULL );

    if( bauir != LFDS710_BTREE_AU_INSERT_RESULT_SUCCESS )
      printf( "Well, bugger!  so much for quality control\n" );
  }

  // TRD : now in-order walk the tree
  while( lfds710_btree_au_get_by_absolute_position_and_then_by_relative_position(baus, &baue, LFDS710_BTREE_AU_SMALLEST_IN_TREE, LFDS710_BTREE_AU_INORDER_WALK_FROM_SMALLEST_TO_LARGEST) )
  {
    temp_td = LFDS710_BTREE_AU_GET_VALUE_FROM_ELEMENT( *baue );
    printf( "element %llu has value \"%s\"\n", temp_td->unique_id, temp_id->payload );
  }

  lfds710_btree_au_cleanup( &baus, NULL );

  free( td );

  return( EXIT_SUCCESS );
}

See Also