Binary Search and A Sample Implementation for C Standard Library bsearch Function

Binary search is an algorithm that finds the position of a search target in a sorted array. It has an worst case run time of logN (for 1 million items, 20 probes can do), which makes it pretty efficient in searching. This post briefly describes binary search and provides two implementations with the same interface as C standard library bsearch() function.

Binary search is a divide and conquer search algorithm. It consists of zero or more probes. At each probe, it first compare the search target (key) with the middle element, if the key is smaller than the middle element, it reduces the search space to the lower half; if it’s bigger, it reduces the search space to upper half; if equal, the search is ended. When the search space is reduced to 0 size, the search fails.

C Implementations

Binary search can be implemented in recursion or iteration. Below are two implementations in recursion and iteration respectively. I used the same interface as C standard library,

void* bsearch(const void* key, const void* base, size_t n, size_t size, int (*cmp)(const void* keyval, const void* datum));

where key points to the search target; base is the pointer to first item of the search space, which has n items, each item is size number of bytes; the last parameter is a function that compares the two items. It must return negative value if first item is less than second, zero if equal and positive if greater.

Below is the recursion version,

#include <stdio.h>

#include <stdlib.h>

 

int searchCnt;

 

void* mybsearch(const void* key, const void* base, size_t n, size_t size, int (*cmp)(const void* keyval, const void* datum)) {

    void *mp;

    int cr;

    if (n  <= 0) {

        return NULL;

    }

    mp = (void*)(base + n/2*size);

    cr = cmp(key, mp);

    printf("%d:%d:%dn", *(int *)mp,*(int*)key, cr);

    ++searchCnt;

    if (cr == 0) {

        return mp;

    } else {

        if (cr > 0) {

            if (n <= 2) {

                return NULL;

            }

            return mybsearch(key, mp + size, (n-1)/2, size, cmp);

        } else if (cr < 0) {

            if (n <= 1) {

                return NULL;

            }

            return mybsearch(key, base, n/2, size, cmp);

        }

    }

}

 

int cmp(const void *a, const void *b) {

    int *pa, *pb;

    pa = (int *)a;

    pb = (int *)b;

    if ((*pa) > (*pb)) {

        return 1;

    } else if ((*pa) < (*pb)) {

        return -1;

    } else {

        return 0;

    }

}

 

int main(int argc, char **argv) {

    int* testArray;

    int i;

    int start = atoi(argv[1]);

    int n = atoi(argv[2]);

    int target = atoi(argv[3]);

    int* rv;

    testArray = (int*) malloc(n*sizeof(int));

    printf("search %d in [%d, %d]n", target, start, start + n - 1);

    for (i = 0; i < n; ++i) {

        testArray[i] = start + i;

    }

    searchCnt = 0;

    rv = (int *)mybsearch(&target, testArray, n, sizeof(int), &cmp);

    if (rv != NULL) {

        printf("search %d: found %d in %d probesn", target, *rv, searchCnt);

    } else {

        printf("search %d not found in %d probesn", target, searchCnt);

    }

}

Below is the iteration version,

#include <stdio.h>

#include <stdlib.h>

 

int searchCnt;

 

void* mybsearch(const void* key, const void* base, size_t n, size_t size, int (*cmp)(const void* keyval, const void* datum)) {

    void *mp;

    int cr;

 

    if (n  <= 0) {

        return NULL;

    }

    mp = (void*)(base + n/2*size);

    cr = cmp(key, mp);

    ++searchCnt;

    printf("%d:%d:%dn", *(int *)mp,*(int*)key, cr);

    while (cr != 0) {

        ++searchCnt;

        if (cr > 0) {

            if (n <= 2) {

                return NULL;

            }

            base = mp + size;

            n = (n-1)/2;

        } else if (cr < 0) {

            if (n <= 1) {

                return NULL;

            }

            n = n/2;

        }

        mp = (void*) (base + n/2*size);

        cr = cmp(key, mp);

        printf("%d:%d:%dn", *(int *)mp,*(int*)key, cr);

    }

    return mp;

}

 

int cmp(const void *a, const void *b) {

    int *pa, *pb;

    pa = (int *)a;

    pb = (int *)b;

    if ((*pa) > (*pb)) {

        return 1;

    } else if ((*pa) < (*pb)) {

        return -1;

    } else {

        return 0;

    }

}

 

int main(int argc, char **argv) {

    int* testArray;

    int i;

    int start = atoi(argv[1]);

    int n = atoi(argv[2]);

    int target = atoi(argv[3]);

    int* rv;

    testArray = (int*) malloc(n*sizeof(int));

    printf("search %d in [%d, %d]n", target, start, start + n - 1);

    for (i = 0; i < n; ++i) {

        testArray[i] = start + i;

    }

    rv = (int *)mybsearch(&target, testArray, n, sizeof(int), &cmp);

    if (rv != NULL) {

        printf("search %d: found %d in %d probesn", target, *rv, searchCnt);

    } else {

        printf("search %d not found in %d probesn", target, searchCnt);

    }

}

Sample Run

Both programs accepts three input parameters, the starting search value, the search space size and the search target. For example, if ./bsearch 100 12345 120, it means the sorted array contains value [100, 100+12345-1], and we’re trying to search for 120 from the sorted array.

A execution of either programs with command (./bsearch 100 12345 120) produces the following output,

6272:120:-1
3186:120:-1
1643:120:-1
871:120:-1
485:120:-1
292:120:-1
196:120:-1
148:120:-1
124:120:-1
112:120:1
118:120:1
121:120:-1
120:120:0
search 120: found 120 in 13 probes

Some Practical Notes

Although binary search has faster run time compared with linear search, it might not out-perform linear search under all situations. This is because binary search access the data randomly, thus perform poorly in terms of memory access.

A common practice is to switch to linear search when the search space size reduces to 8, 16 or even more depends on your computers. Another approach people use to speed up binary search is to store the first few accessed values (N/2, N/4, 3N/4, etc. ) in a separate array and access them sequentially.

The practical notes are summarized from wikipedia.

References:
1. Wikipedia, Binary search algorithm: http://en.wikipedia.org/wiki/Binary_search
2. C Standard Library Reference: http://www.utas.edu.au/infosys/info/documentation/C/CStdLib.html

Depth-First Graph Traversal using Stack — a C Implementation

There’re two common ways of visiting all nodes of a graph, depth-first search and breadth-first search. These two graph traversal approaches relate to two popular data structures, stack and queue respectively. (One can also do it in a recursive manner. It’s more intuitive but less efficient.)

This post covers depth-first search with a stack data structure. The algorithm expressed in pseudo code is as below,

DFS (Graph, root):
   create a stack S
   push root node to S
   while S is not empty:
       pop an itme v from S
       mark the item v as visited
       for each node w that is directed from v:
           if w is not visited:
               push w to S

The first step of the implementation is to implement a stack. The stack implemented below has the basic functions like push, pop, ifEmpty, etc.

1. Stack Implementation

The header file stack.h, defines the data structures used in Stack and Graph.

#include <stdio.h>
#include <stdlib.h>
typedef struct Point {
   int h;
   int w;
} Point;
typedef struct GraphNode {
   struct Point pvalue;
   struct GraphNode* left;
   struct GraphNode* right;
} GraphNode;
typedef struct StackElement {
   struct GraphNode value;
   struct StackElement *up;
   struct StackElement *down;
} StackElement;
typedef struct Stack {
   struct StackElement* topP;
   struct StackElement* bottomP;
} Stack;
void initStack(struct Stack *s);
void push(struct Stack *s, struct GraphNode _value);
void pop(struct Stack *s);
struct GraphNode top(struct Stack *s);
int ifEmpty(struct Stack *s);

The actual implementation stack.c

/**
a C stack implementation using linked list
*/
#include "stack.h"
/*initialize the stack*/
void initStack(struct Stack *s) {
   s->topP = NULL;
   s->bottomP = NULL;
}
/*add an element to the top of the stack*/
void push(struct Stack *s, struct GraphNode _value) {
   //allocate a new StackElement for _value
   StackElement *newElement;
   newElement = (StackElement*) malloc(sizeof(StackElement));
   newElement->value = _value;
   newElement->up = NULL;
   newElement->down = NULL;
   if (s->topP == NULL) {
        //first element
       s->topP = newElement;
        s->bottomP = newElement;
   } else {
        //push it to the top
        newElement->down = s->topP;
        s->topP = newElement;
   }
}
/*delete the top element from the stack*/
void pop(struct Stack *s) {
   StackElement *element;
   if (s->topP == NULL) {
        //empty stack
        return;
   } else {
        element = s->topP;
        s->topP = element->down;
        if (s->topP != NULL)
             s->topP->up = NULL;
        free(element);
   }
}
/*get the top value of the stack, but don't delete it*/
struct GraphNode top(struct Stack *s) {
   return s->topP->value;
}
/*check if the stack is empty*/
int ifEmpty(struct Stack *s) {
   return (s->topP == NULL ? 1:0);
}

Note that the top() function above returns the top element of the stack and the pop() function deletes the top element from the stack. Some stack implementations has the pop() function does both return and delete. But I personally think separating them is more convenient as sometimes you just want to take a look at the stack but not delete anything from it.

The Stack above will take input element of GraphNode type, but you’re free to modify it to fit your needs.

2. The Depth-First Graph Traversal

For simplicity, an example of visiting all nodes of a binary tree is given below. The tree structure is included in the comment of the source code file dfs.c,

#include <stdio.h>
#include <stdlib.h>
#include "stack.h"
/* a simple graph (binary tree)
         (0,    3)   <– root node
          /     
      (1, 2)    (1,4)
       /       /   
   (2,1) (2,2) (2,3) (2,5)
*/
typedef struct Graph {
   struct GraphNode* root;
} Graph;
struct GraphNode initNode(int _h, int _w) {
   struct GraphNode node;
   node.pvalue.h = _h;
   node.pvalue.w = _w;
   node.left = NULL;
   node.right = NULL;
   return node;
}
int main() {
   struct Graph g;
   struct GraphNode currentNode;
   /*read in the graph from top to bottom*/
   g.root = (GraphNode*) malloc(sizeof(GraphNode));
   *g.root = initNode(0, 3);
   //level 2 nodes
   g.root->left = (GraphNode*) malloc(sizeof(GraphNode));
   *g.root->left = initNode(1, 2);
   g.root->right = (GraphNode*) malloc(sizeof(GraphNode));
   *g.root->right = initNode(1, 4);
   //level 3 nodes
   (*g.root->left).left = (GraphNode*) malloc(sizeof(GraphNode));
   *(*g.root->left).left = initNode(2, 1);
   (*g.root->left).right = (GraphNode*) malloc(sizeof(GraphNode));
   *(*g.root->left).right = initNode(2, 2);
   (*g.root->right).left = (GraphNode*) malloc(sizeof(GraphNode));
   *(*g.root->right).left = initNode(2, 3);
   (*g.root->right).right = (GraphNode*) malloc(sizeof(GraphNode));
   *(*g.root->right).right = initNode(2, 5);
   /*DFS traversal*/
   Stack s;
   initStack(&s);
   push(&s, *g.root);
   while (!ifEmpty(&s)) {
       currentNode = top(&s);
        pop(&s);
        //mark as visited, here we simply print the graph node value out
       printf("(%d, %d)n", currentNode.pvalue.h, currentNode.pvalue.w);
        //at most two children, left and right. As the binary tree is acyclic,
        // both nodes should not be visited yet, for cyclic graphs, 
        //one need additional methods to check if the nodes is visited or not before push
       if (currentNode.left != NULL) {
           push(&s, *currentNode.left);
       } 
       if (currentNode.right != NULL) {
           push(&s, *currentNode.right);
        } 
   }
   return 0;
}

3. Output

To compile the code in Linux, type the following command,

gcc stack.c dfs.c -o dfs

Then run the binary dfs,

        ./dfs

The output we’ll get is,

(0, 3)

(1, 4)

(2, 5)

(2, 3)

(1, 2)

(2, 2)

(2, 1)

which corresponds to the expected nodes visiting order in depth-first search. As we push the left node first, the traversal is from right to left.

Breadth-First Graph Traversal using Queue – a C Implementation

There’re two common ways of visiting all nodes of a graph, depth-first search and breadth-first search. These two graph traversal approaches relate to two popular data structures, stack and queue respectively. (One can also do it in a recursive manner. It’s more intuitive but less efficient.)

This post covers breadth-first search with a queue data structure. The algorithm expressed in pseudo code is as below,

BFS (Graph, root):
    create a queue Q
    enqueue root node to Q
    while Q is not empty:
        dequeue an itme v from Q
        mark the item v as visited
        for each node w that is directed from v:
            if w is not visited:
                enqueue w to Q

The first step of the implementation is to implement a queue. The queue implemented below has the basic functions like enqueue, dequeue, ifEmpty, etc.

1. A Queue Implementation

The header file queue.h, defines the data structures used in Queue and Graph.

#include <stdio.h>
#include <stdlib.h>
typedef struct Point {
   int h;
   int w;
} Point;
typedef struct GraphNode {
   struct Point pvalue;
   struct GraphNode* left;
   struct GraphNode* right;
} GraphNode;
typedef struct QueueElement {
   struct GraphNode value;
   struct QueueElement *next;
} QueueElement;
typedef struct Queue {
   struct QueueElement* head;
   struct QueueElement* tail;
} Queue;
void initQueue(struct Queue *q);
void enqueue(struct Queue *q, struct GraphNode _value);
void dequeue(struct Queue *q);
struct GraphNode front(struct Queue *q);
int ifEmpty(struct Queue *q);

The source file queue.c,

/**
a C queue implementation using linked list
*/
#include "queue.h"
/*initialize the queue*/
void initQueue(struct Queue *q) {
   q->head = NULL;
   q->tail = NULL;
}
/*insert an element to the end of the queue*/
void enqueue(struct Queue *q, struct GraphNode _value) {
   //allocate a new QeueuElement for _value
   QueueElement *newElement;
   newElement = (QueueElement*) malloc(sizeof(QueueElement));
   newElement->value = _value;
   newElement->next = NULL;
   if (q->head == NULL) {
        //first element
       q->head = newElement;
       q->tail = newElement;
   } else {
        //put it to the tail
        q->tail->next = newElement;
        q->tail = newElement;
   }
}
/*delete the first element from the queue*/
void dequeue(struct Queue *q) {
   QueueElement *element;
   if (q->head == NULL) {
        //empty queue
        return;
   } else {
        element = q->head;
        q->head = q->head->next;
        free(element);
   }
}
/*get the front value of the queue, but don't delete it*/
struct GraphNode front(struct Queue *q) {
   return q->head->value;
}
/*check if the queue is empty*/
int ifEmpty(struct Queue *q) {
   return (q->head == NULL ? 1:0);
}

Note that the front() function above returns the first element of the queue and the dequeue() function deletes the first element from the queue. Some queue implementations has the dequeue() function does both return and delete. But I personally think separating them is more convenient as sometimes you just want to take a look at the queue but not delete anything from it.

The Queue above will take input element of GraphNode type, but you’re free to modify it to fit your needs.

2. The Breadth-First Graph Traversal

For simplicity, an example of visiting all nodes of a binary tree is given below. The tree structure is included in the comment of the source code file bfs.c,

#include <stdio.h>
#include <stdlib.h>
#include "queue.h"
/* a simple graph (binary tree)
         (0,    3)   <– root node
          /     
      (1, 2)    (1,4)
       /       /   
   (2,1) (2,2) (2,3) (2,5)
*/
typedef struct Graph {
   struct GraphNode* root;
} Graph;
struct GraphNode initNode(int _h, int _w) {
   struct GraphNode node;
   node.pvalue.h = _h;
   node.pvalue.w = _w;
   node.left = NULL;
   node.right = NULL;
   return node;
}
int main() {
   struct Graph g;
   struct GraphNode currentNode;
   /*read in the graph from top to bottom*/
   g.root = (GraphNode*) malloc(sizeof(GraphNode));
   *g.root = initNode(0, 3);
   //level 2 nodes
   g.root->left = (GraphNode*) malloc(sizeof(GraphNode));
   *g.root->left = initNode(1, 2);
   g.root->right = (GraphNode*) malloc(sizeof(GraphNode));
   *g.root->right = initNode(1, 4);
   //level 3 nodes
   (*g.root->left).left = (GraphNode*) malloc(sizeof(GraphNode));
   *(*g.root->left).left = initNode(2, 1);
   (*g.root->left).right = (GraphNode*) malloc(sizeof(GraphNode));
   *(*g.root->left).right = initNode(2, 2);
   (*g.root->right).left = (GraphNode*) malloc(sizeof(GraphNode));
   *(*g.root->right).left = initNode(2, 3);
   (*g.root->right).right = (GraphNode*) malloc(sizeof(GraphNode));
   *(*g.root->right).right = initNode(2, 5);
   /*BFS traversal*/
   Queue q;
   initQueue(&q);
   enqueue(&q, *g.root);
   while (!ifEmpty(&q)) {
       currentNode = front(&q);
       //dequeue(&q); //you can either put dequeue here or at the end of while
        //mark as visited, here we simply print the graph node value out
       printf("(%d, %d)n", currentNode.pvalue.h, currentNode.pvalue.w);
        //at most two children, left and right. As the binary tree is acyclic,
        // both nodes should not be visited yet, for cyclic graphs, 
        //one need additional methods to check if the nodes is visited or not before enqueue
       if (currentNode.left != NULL) {
           enqueue(&q, *currentNode.left);
       } 
       if (currentNode.right != NULL) {
           enqueue(&q, *currentNode.right);
        } 
        dequeue(&q);
   }
   return 0;
}

3. Output

To compile the code in Linux, type the following command,

gcc queue.c bfs.c -o bfs

Then run the binary bfs,

        ./bfs

The output we’ll get is,

(0, 3)

(1, 2)

(1, 4)

(2, 1)

(2, 2)

(2, 3)

(2, 5)

which corresponds to the expected nodes visiting order in breadth-first search, level 0 node, follows by level 1 nodes, and finally level 2 nodes.