From 4894d8c1ed3ba0acb1fa5abc4cac63bcc172735c Mon Sep 17 00:00:00 2001 From: karl Date: Sun, 18 Oct 2020 11:23:47 +0200 Subject: [PATCH] Reimplement median of medians --- MedianOfMedians.h | 198 +++++++++++++++++++--------------------------- main.cpp | 6 +- 2 files changed, 86 insertions(+), 118 deletions(-) diff --git a/MedianOfMedians.h b/MedianOfMedians.h index c05d9b6..3283386 100644 --- a/MedianOfMedians.h +++ b/MedianOfMedians.h @@ -1,136 +1,104 @@ #pragma once -// https://en.wikipedia.org/wiki/Median_of_medians -// https://oneraynyday.github.io/algorithms/2016/06/17/Median-Of-Medians/ -// https://www.geeksforgeeks.org/kth-smallestlargest-element-unsorted-array-set-3-worst-case-linear-time/ +// Implemented pseudocode from https://en.wikipedia.org/wiki/Median_of_medians -uint32_t findMedian(std::vector values) -{ - return values[(values.size() / 2)]; -} +uint32_t pivot(std::vector &v, uint32_t left, uint32_t right); +uint32_t partition(std::vector &v, uint32_t left, uint32_t right, uint32_t pivotIndex, uint32_t n); -uint32_t findMedianOfMedians(std::vector > values) -{ - std::vector medians; - for (size_t i = 0; i < values.size(); i++) { - uint32_t m = findMedian(values[i]); - medians.push_back(m); - } - return findMedian(medians); -} - -uint32_t getMedianOfMedians(const std::vector values, uint32_t k) -{ - // Divide the list into n/5 lists of 5 elements each - std::vector > vec2D; - size_t count = 0; - while (count != values.size()) { - size_t countRow = 0; - std::vector row; - while ((countRow < 5) && (count < values.size())) - { - row.push_back(values[count]); - count++; - countRow++; +// Return the index of an element which is close to (but likely not exactly) the median. +uint32_t findMedianOfMedians(std::vector &v, uint32_t left, uint32_t right, uint32_t n) { + while (true) { + if (left == right) { + return left; + } + + uint32_t pivotIndex = pivot(v, left, right); + pivotIndex = partition(v, left, right, pivotIndex, n); + + if (n == pivotIndex) { + return n; + } else if (n < pivotIndex) { + right = pivotIndex - 1; + } else { + left = pivotIndex + 1; } - vec2D.push_back(row); } +} - // Calculating a new pivot for making splits - uint32_t m = findMedianOfMedians(vec2D); +uint32_t partition(std::vector &v, uint32_t left, uint32_t right, uint32_t pivotIndex, uint32_t n) { + uint32_t pivotValue = v[pivotIndex]; - // Partition the list into unique elements larger than 'm' (call this sublist L1) and those smaller them 'm' (call this sublist L2) - std::vector L1, L2; + std::swap(v[pivotIndex], v[right]); - for (size_t i = 0; i < vec2D.size(); i++) - { - for (size_t j = 0; j < vec2D[i].size(); j++) - { - if (vec2D[i][j] > m) - { - L1.push_back(vec2D[i][j]); - } - else if (vec2D[i][j] < m) - { - L2.push_back(vec2D[i][j]); - } + uint32_t storeIndex = left; + + // Move all elements smaller than the pivot to the left of the pivot + for (uint32_t i = left; i < right; i++) { + if (v[i] < pivotValue) { + std::swap(v[storeIndex], v[i]); + storeIndex++; } } - if (k <= L1.size()) - { - return getMedianOfMedians(L1, k); - } - else if (k > (L1.size() + 1)) - { - return getMedianOfMedians(L2, k - ((int)L1.size()) - 1); - } - return m; -} + // Move all elements equal to the pivot right after + // the smaller elements + uint32_t storeIndexEq = storeIndex; -// A simple function to find median of arr[]. -// This is called only for an array of size 5 in this program. -uint32_t findMedian(uint32_t arr[], int n) -{ - std::sort(arr, arr + n); // Sort the array - return arr[n / 2]; // Return middle element -} - -// searches for x in arr[l..r], and partitions the array around x -int partition(uint32_t arr[], int l, int r, uint32_t pivotValue) -{ - // Search for x in arr[l..r] and move it to end - int i; - for (i = l; i < r; i++) - if (arr[i] == pivotValue) - break; - swap(&arr[i], &arr[r]); - - // Standard partition algorithm - i = l; - for (int j = l; j < r; j++) - { - if (arr[j] <= pivotValue) - { - i++; - swap(&arr[i], &arr[j]); + for (uint32_t i = storeIndex; i < right; i++) { + if (v[i] == pivotValue) { + std::swap(v[storeIndexEq], v[i]); + storeIndexEq++; } } - swap(&arr[i], &arr[r]); - return i; + + std::swap(v[right], v[storeIndexEq]); + + if (n < storeIndex) { + return storeIndex; + } + if (n <= storeIndexEq) { + return n; + } + return storeIndexEq; } -// Returns k'th smallest element in arr[l..r] in worst case -// linear time. ASSUMPTION: ALL ELEMENTS IN ARR[] ARE DISTINCT -//int getMedianOfMedians(int arr[], int l, int r, int k) -uint32_t getMedianOfMedians(uint32_t* arr, int l, int r, int k) -{ - int n = r - l + 1; // Number of elements in arr[l..r] +uint32_t partition5(std::vector &v, uint32_t left, uint32_t right) { + uint32_t i = left + 1; - // Divide arr[] in groups of size 5, calculate median - // of every group and store it in median[] array. - // There will be floor((n + 4) / 5) groups; - //int median[(n + 4) / 5]; // non VS compliant! - uint32_t* median = new uint32_t[(n + 4) / 5]; - int i = 0; - for (i = 0; i < n / 5; i++) - median[i] = findMedian(arr + l + i * 5, 5); - if (i * 5 < n) //For last group with less than 5 elements - { - median[i] = findMedian(arr + l + i * 5, n % 5); - i++; + while (i <= right) { + uint32_t j = i; + + while (j > left && v[j - 1] > v[j]) { + std::swap(v[j - 1], v[j]); + j = j - 1; + } + + i = i + 1; } - // Find median of all medians using recursive call. - // If median[] has only one element, then no need for recursive call - uint32_t medOfMed = (i == 1) ? median[0] : getMedianOfMedians(median, 0, i - 1, i / 2); - - // Partition the array around a random element and - // get position of pivot element in sorted array - int pos = partition(arr, l, r, medOfMed); - - if (pos - l == k - 1) return arr[pos]; - else if (pos - l > k - 1) - return getMedianOfMedians(arr, l, pos - 1, k); - else return getMedianOfMedians(arr, pos + 1, r, k - pos + l - 1); + return (left + right) / 2; } + +uint32_t pivot(std::vector &v, uint32_t left, uint32_t right) { + // for 5 or less elements just get median + if (right - left < 5) { + return partition5(v, left, right); + } + + // otherwise move the medians of five-element subgroups to the first n/5 positions + for (uint32_t i = left; i <= right; i += 5) { + // get the median position of the i'th five-element subgroup + uint32_t subRight = i + 4; + + if (subRight > right) { + subRight = right; + } + + uint32_t median5 = partition5(v, i, subRight); + std::swap(v[median5], v[left + (i - left) / 5]); + } + + // compute the median of the n/5 medians-of-five + uint32_t mid = (right - left) / 10 + left + 1; + return findMedianOfMedians(v, left, left + (right - left) / 5, mid); +} \ No newline at end of file diff --git a/main.cpp b/main.cpp index c14ad49..9247f40 100644 --- a/main.cpp +++ b/main.cpp @@ -67,8 +67,9 @@ int main(int argc, char** argv) Timing::getInstance()->stopRecord("randomized select"); // ein weiterer Median - Algorithmus aus der Literatur - implemented with std::vector + std::vector mom_numbers = std::vector(numbers); Timing::getInstance()->startRecord("vector median of medians"); - std::cout << "vector median of medians: " << getMedianOfMedians(numbers, idxMed + 1) << std::endl; + std::cout << "vector median of medians: " << mom_numbers[findMedianOfMedians(mom_numbers, 0, numbers.size() - 1, idxMed + 1)] << std::endl; Timing::getInstance()->stopRecord("vector median of medians"); // ein weiterer Median - Algorithmus aus der Literatur - realized with array @@ -78,9 +79,8 @@ int main(int argc, char** argv) Timing::getInstance()->stopRecord("array median of medians");*/ // noch ein ein weiterer Median - Algorithmus weil wir so cool sind - std::vector numbers_wirth(numbers); // Copy because wirth works in-place Timing::getInstance()->startRecord("wirth"); - std::cout << "wirth kth element: " << getWirthKthSmallest(numbers_wirth, idxMed) << std::endl; + std::cout << "wirth kth element: " << getWirthKthSmallest(numbers, idxMed) << std::endl; Timing::getInstance()->stopRecord("wirth"); // Verwendung des C++ STL function templates nth_element