Heap

August 18, 2014
Author:Eric
Source:http://blog.wjin.org/posts/heap.html
Declaration: this work is licensed under a Creative Commons Attribution-NonCommercial 4.0 International License. Creative Commons License

Heap Implementation

Heap is similar to priority_queue in C++ STL. Core functions are sift_down and sift_up operations.

// max heap
class Heap
{
private:
    vector<int> m_data; // heap data
    size_t m_size; // heap size

    void sift_up(int i)
    {
        while (i > 1 && m_data[i] > m_data[i / 2]) {
            swap(m_data[i], m_data[i / 2]);
            i /= 2;
        }
    }

    void sift_down(int i)
    {
        int j = 2 * i; // j is the left child

        while (j <= m_size) { // loop until leaf
            // find max child
            if (j + 1 <= m_size && m_data[j + 1] > m_data[j])
                j += 1;

            // swap if possible
            if (m_data[i] < m_data[j]) {
                swap(m_data[i], m_data[j]);
                i = j;
                j *= 2;
            } else {
                break;
            }
        }
    }

    void make_heap()
    {
        for (int i = m_size / 2; i > 0; i--) {
            sift_down(i);
        }
    }

public:
    Heap(vector<int> &v)
    {
        m_size = v.size();

        // do not use m_data[0] to simplify sift_down operation
        // as when parameter i is 0, cannot find left child using 2*i
        m_data.resize(m_size + 1);
        m_data[0] = -1;

        copy(v.begin(), v.end(), m_data.begin() + 1);
        make_heap();
    }

    void push_heap(int val)
    {
        m_data.push_back(val);
        m_size++;
        sift_up (m_size);
    }

    int pop_heap()
    {
        int val = get_top();
        swap(m_data[1], m_data[m_size]);
        m_data.pop_back();
        m_size--;
        sift_down(1);
        return val;
    }

    int get_top()
    {
        return m_data[1];
    }

    int get_size()
    {
        return m_size;
    }
};

Application

Heap Sort

class HeapSort
{
private:
    Heap hp;

public:
    HeapSort(vector<int> &v) : hp(v)
    {
    }

    vector<int> sort()
    {
        vector<int> ret;
        while (hp.get_size()) {
            ret.push_back(hp.pop_heap());
        }
        return ret;
    }
};

Or we can just sort it in place:

class HeapSort
{
private:
    void sift_down(vector<int> &v, int vs, int i)
    {
        int j = 2 * i; // j is the left child

        while (j <= vs) { // loop until leaf
            // find max child
            if (j + 1 <= vs && v[j + 1] > v[j])
                j += 1;

            // swap if possible
            if (v[i] < v[j]) {
                swap(v[i], v[j]);
                i = j;
                j *= 2;
            } else {
                break;
            }
        }
    }

    void make_heap(vector<int> &v, int vs)
    {
        for (int i = vs / 2; i > 0; i--) {
            sift_down(v, vs, i);
        }
    }

public:
    void Sort(vector<int> &v)
    {
        int vs = v.size();

        // do not use v[0] to simplify sift_down operation
        // as when parameter i is 0, cannot find left child using 2*i
        v.insert(v.begin(), -1);

        make_heap(v, vs);

        while (vs > 1) {
            swap(v[1], v[vs]);
            vs--;
            sift_down(v, vs, 1);
        }

        v.erase(v.begin());
    }
};

Find the median of a data flow at anytime

Analysis

Maintain two heaps: small heap and big heap respectively. All elements from small heap are smaller than those in big heap.

And we use max heap to construct small heap(top is the max element) and min heap to construct big heap(top is the minimum element).

When a new element comes, compare it with the top of two heaps(smallTop, bigTop).

For simplicity, let smallSize >= bigSize, so at anytime, the median is either

smallTop, (smallSize > bigSize)

or

(smallTop + bigTop) / 2, (smallSize == bigSize)

Note: there is a O(n) algorithm to find a median of a given array, however, that is different from this question.