Reimplement median of medians
This commit is contained in:
parent
e22eee97da
commit
4894d8c1ed
@ -1,136 +1,104 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
// https://en.wikipedia.org/wiki/Median_of_medians
|
// Implemented pseudocode from https://en.wikipedia.org/wiki/Median_of_medians
|
||||||
// https://oneraynyday.github.io/algorithms/2016/06/17/Median-Of-Medians/
|
|
||||||
// https://www.geeksforgeeks.org/kth-smallestlargest-element-unsorted-array-set-3-worst-case-linear-time/
|
|
||||||
|
|
||||||
uint32_t findMedian(std::vector<uint32_t> values)
|
uint32_t pivot(std::vector<uint32_t> &v, uint32_t left, uint32_t right);
|
||||||
{
|
uint32_t partition(std::vector<uint32_t> &v, uint32_t left, uint32_t right, uint32_t pivotIndex, uint32_t n);
|
||||||
return values[(values.size() / 2)];
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t findMedianOfMedians(std::vector<std::vector<uint32_t> > values)
|
// Return the index of an element which is close to (but likely not exactly) the median.
|
||||||
{
|
uint32_t findMedianOfMedians(std::vector<uint32_t> &v, uint32_t left, uint32_t right, uint32_t n) {
|
||||||
std::vector<uint32_t> medians;
|
while (true) {
|
||||||
for (size_t i = 0; i < values.size(); i++) {
|
if (left == right) {
|
||||||
uint32_t m = findMedian(values[i]);
|
return left;
|
||||||
medians.push_back(m);
|
}
|
||||||
}
|
|
||||||
return findMedian(medians);
|
uint32_t pivotIndex = pivot(v, left, right);
|
||||||
}
|
pivotIndex = partition(v, left, right, pivotIndex, n);
|
||||||
|
|
||||||
uint32_t getMedianOfMedians(const std::vector<uint32_t> values, uint32_t k)
|
if (n == pivotIndex) {
|
||||||
{
|
return n;
|
||||||
// Divide the list into n/5 lists of 5 elements each
|
} else if (n < pivotIndex) {
|
||||||
std::vector<std::vector<uint32_t> > vec2D;
|
right = pivotIndex - 1;
|
||||||
size_t count = 0;
|
} else {
|
||||||
while (count != values.size()) {
|
left = pivotIndex + 1;
|
||||||
size_t countRow = 0;
|
|
||||||
std::vector<uint32_t> row;
|
|
||||||
while ((countRow < 5) && (count < values.size()))
|
|
||||||
{
|
|
||||||
row.push_back(values[count]);
|
|
||||||
count++;
|
|
||||||
countRow++;
|
|
||||||
}
|
}
|
||||||
vec2D.push_back(row);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Calculating a new pivot for making splits
|
uint32_t partition(std::vector<uint32_t> &v, uint32_t left, uint32_t right, uint32_t pivotIndex, uint32_t n) {
|
||||||
uint32_t m = findMedianOfMedians(vec2D);
|
uint32_t pivotValue = v[pivotIndex];
|
||||||
|
|
||||||
// Partition the list into unique elements larger than 'm' (call this sublist L1) and those smaller them 'm' (call this sublist L2)
|
std::swap(v[pivotIndex], v[right]);
|
||||||
std::vector<uint32_t> L1, L2;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < vec2D.size(); i++)
|
uint32_t storeIndex = left;
|
||||||
{
|
|
||||||
for (size_t j = 0; j < vec2D[i].size(); j++)
|
// Move all elements smaller than the pivot to the left of the pivot
|
||||||
{
|
for (uint32_t i = left; i < right; i++) {
|
||||||
if (vec2D[i][j] > m)
|
if (v[i] < pivotValue) {
|
||||||
{
|
std::swap(v[storeIndex], v[i]);
|
||||||
L1.push_back(vec2D[i][j]);
|
storeIndex++;
|
||||||
}
|
|
||||||
else if (vec2D[i][j] < m)
|
|
||||||
{
|
|
||||||
L2.push_back(vec2D[i][j]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (k <= L1.size())
|
// Move all elements equal to the pivot right after
|
||||||
{
|
// the smaller elements
|
||||||
return getMedianOfMedians(L1, k);
|
uint32_t storeIndexEq = storeIndex;
|
||||||
}
|
|
||||||
else if (k > (L1.size() + 1))
|
|
||||||
{
|
|
||||||
return getMedianOfMedians(L2, k - ((int)L1.size()) - 1);
|
|
||||||
}
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
// A simple function to find median of arr[].
|
for (uint32_t i = storeIndex; i < right; i++) {
|
||||||
// This is called only for an array of size 5 in this program.
|
if (v[i] == pivotValue) {
|
||||||
uint32_t findMedian(uint32_t arr[], int n)
|
std::swap(v[storeIndexEq], v[i]);
|
||||||
{
|
storeIndexEq++;
|
||||||
std::sort(arr, arr + n); // Sort the array
|
|
||||||
return arr[n / 2]; // Return middle element
|
|
||||||
}
|
|
||||||
|
|
||||||
// searches for x in arr[l..r], and partitions the array around x
|
|
||||||
int partition(uint32_t arr[], int l, int r, uint32_t pivotValue)
|
|
||||||
{
|
|
||||||
// Search for x in arr[l..r] and move it to end
|
|
||||||
int i;
|
|
||||||
for (i = l; i < r; i++)
|
|
||||||
if (arr[i] == pivotValue)
|
|
||||||
break;
|
|
||||||
swap(&arr[i], &arr[r]);
|
|
||||||
|
|
||||||
// Standard partition algorithm
|
|
||||||
i = l;
|
|
||||||
for (int j = l; j < r; j++)
|
|
||||||
{
|
|
||||||
if (arr[j] <= pivotValue)
|
|
||||||
{
|
|
||||||
i++;
|
|
||||||
swap(&arr[i], &arr[j]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
swap(&arr[i], &arr[r]);
|
|
||||||
return i;
|
std::swap(v[right], v[storeIndexEq]);
|
||||||
|
|
||||||
|
if (n < storeIndex) {
|
||||||
|
return storeIndex;
|
||||||
|
}
|
||||||
|
if (n <= storeIndexEq) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
return storeIndexEq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns k'th smallest element in arr[l..r] in worst case
|
uint32_t partition5(std::vector<uint32_t> &v, uint32_t left, uint32_t right) {
|
||||||
// linear time. ASSUMPTION: ALL ELEMENTS IN ARR[] ARE DISTINCT
|
uint32_t i = left + 1;
|
||||||
//int getMedianOfMedians(int arr[], int l, int r, int k)
|
|
||||||
uint32_t getMedianOfMedians(uint32_t* arr, int l, int r, int k)
|
|
||||||
{
|
|
||||||
int n = r - l + 1; // Number of elements in arr[l..r]
|
|
||||||
|
|
||||||
// Divide arr[] in groups of size 5, calculate median
|
while (i <= right) {
|
||||||
// of every group and store it in median[] array.
|
uint32_t j = i;
|
||||||
// There will be floor((n + 4) / 5) groups;
|
|
||||||
//int median[(n + 4) / 5]; // non VS compliant!
|
while (j > left && v[j - 1] > v[j]) {
|
||||||
uint32_t* median = new uint32_t[(n + 4) / 5];
|
std::swap(v[j - 1], v[j]);
|
||||||
int i = 0;
|
j = j - 1;
|
||||||
for (i = 0; i < n / 5; i++)
|
}
|
||||||
median[i] = findMedian(arr + l + i * 5, 5);
|
|
||||||
if (i * 5 < n) //For last group with less than 5 elements
|
i = i + 1;
|
||||||
{
|
|
||||||
median[i] = findMedian(arr + l + i * 5, n % 5);
|
|
||||||
i++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find median of all medians using recursive call.
|
return (left + right) / 2;
|
||||||
// If median[] has only one element, then no need for recursive call
|
|
||||||
uint32_t medOfMed = (i == 1) ? median[0] : getMedianOfMedians(median, 0, i - 1, i / 2);
|
|
||||||
|
|
||||||
// Partition the array around a random element and
|
|
||||||
// get position of pivot element in sorted array
|
|
||||||
int pos = partition(arr, l, r, medOfMed);
|
|
||||||
|
|
||||||
if (pos - l == k - 1) return arr[pos];
|
|
||||||
else if (pos - l > k - 1)
|
|
||||||
return getMedianOfMedians(arr, l, pos - 1, k);
|
|
||||||
else return getMedianOfMedians(arr, pos + 1, r, k - pos + l - 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t pivot(std::vector<uint32_t> &v, uint32_t left, uint32_t right) {
|
||||||
|
// for 5 or less elements just get median
|
||||||
|
if (right - left < 5) {
|
||||||
|
return partition5(v, left, right);
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise move the medians of five-element subgroups to the first n/5 positions
|
||||||
|
for (uint32_t i = left; i <= right; i += 5) {
|
||||||
|
// get the median position of the i'th five-element subgroup
|
||||||
|
uint32_t subRight = i + 4;
|
||||||
|
|
||||||
|
if (subRight > right) {
|
||||||
|
subRight = right;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t median5 = partition5(v, i, subRight);
|
||||||
|
std::swap(v[median5], v[left + (i - left) / 5]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// compute the median of the n/5 medians-of-five
|
||||||
|
uint32_t mid = (right - left) / 10 + left + 1;
|
||||||
|
return findMedianOfMedians(v, left, left + (right - left) / 5, mid);
|
||||||
|
}
|
6
main.cpp
6
main.cpp
@ -67,8 +67,9 @@ int main(int argc, char** argv)
|
|||||||
Timing::getInstance()->stopRecord("randomized select");
|
Timing::getInstance()->stopRecord("randomized select");
|
||||||
|
|
||||||
// ein weiterer Median - Algorithmus aus der Literatur - implemented with std::vector
|
// ein weiterer Median - Algorithmus aus der Literatur - implemented with std::vector
|
||||||
|
std::vector<uint32_t> mom_numbers = std::vector<uint32_t>(numbers);
|
||||||
Timing::getInstance()->startRecord("vector median of medians");
|
Timing::getInstance()->startRecord("vector median of medians");
|
||||||
std::cout << "vector median of medians: " << getMedianOfMedians(numbers, idxMed + 1) << std::endl;
|
std::cout << "vector median of medians: " << mom_numbers[findMedianOfMedians(mom_numbers, 0, numbers.size() - 1, idxMed + 1)] << std::endl;
|
||||||
Timing::getInstance()->stopRecord("vector median of medians");
|
Timing::getInstance()->stopRecord("vector median of medians");
|
||||||
|
|
||||||
// ein weiterer Median - Algorithmus aus der Literatur - realized with array
|
// ein weiterer Median - Algorithmus aus der Literatur - realized with array
|
||||||
@ -78,9 +79,8 @@ int main(int argc, char** argv)
|
|||||||
Timing::getInstance()->stopRecord("array median of medians");*/
|
Timing::getInstance()->stopRecord("array median of medians");*/
|
||||||
|
|
||||||
// noch ein ein weiterer Median - Algorithmus weil wir so cool sind
|
// noch ein ein weiterer Median - Algorithmus weil wir so cool sind
|
||||||
std::vector<uint32_t> numbers_wirth(numbers); // Copy because wirth works in-place
|
|
||||||
Timing::getInstance()->startRecord("wirth");
|
Timing::getInstance()->startRecord("wirth");
|
||||||
std::cout << "wirth kth element: " << getWirthKthSmallest(numbers_wirth, idxMed) << std::endl;
|
std::cout << "wirth kth element: " << getWirthKthSmallest(numbers, idxMed) << std::endl;
|
||||||
Timing::getInstance()->stopRecord("wirth");
|
Timing::getInstance()->stopRecord("wirth");
|
||||||
|
|
||||||
// Verwendung des C++ STL function templates nth_element
|
// Verwendung des C++ STL function templates nth_element
|
||||||
|
Loading…
x
Reference in New Issue
Block a user