0. 数据结构
本文中用到的数据结构定义为
template <typename DataType>
struct LNode {
DataType data;
LNode<DataType>* next;
LNode() : next(nullptr) { memset(&data, 0, sizeof(DataType)); };
LNode(DataType val) : data(val), next(nullptr) {};
LNode(DataType val, LNode<DataType>* p) : data(val), next(p) {};
};
template <typename DataType>
using LinkedList = LNode<DataType>*;
template <typename DataType, typename ListNodeType>
ListNodeType* createSingleList(vector<DataType> initData, bool hasHead = true, bool isCyclic = false)
{
ListNodeType* list = new ListNodeType;
ListNodeType* newNode = nullptr;
if (!hasHead) {
list->data = initData.at(0);
}
if (isCyclic) {
list->next = list;
}
for (size_t index = 0; index < ((hasHead) ? initData.size() : initData.size() - 1); ++index) {
newNode = new ListNodeType(initData.at(initData.size() - 1 - index), list->next);
list->next = newNode;
}
return list;
}
1. 插入排序
1.1. 直接插入排序
void insertSort(int* list, size_t size)
{
for (size_t index = 0; index < size; ++index) {
size_t jndex = 0;
int cache = list[index];
/* find inserting pos */
while (jndex < index) {
if (list[index] > list[jndex]) {
++jndex;
}
else {
break;
}
}
/* shift */
for (size_t kndex = index; kndex > jndex; --kndex) {
list[kndex] = list[kndex - 1];
}
/* insert */
list[jndex] = cache;
}
return;
}
1.2. 二路插入排序
void insertSort_2part(int* list, size_t size)
{
if (size == 1) {
return;
}
// now head and tail will not be identical when sorted;
size_t head = 0;
size_t tail = 0;
int* cache = new int[size];
// init;
cache[0] = list[0];
// sorting;
for (size_t index = 1; index < size; ++index) {
if (list[index] <= cache[head]) {
head = (head + size - 1) % size; // avoid overflow;
cache[head] = list[index];
}
else if (list[index] >= cache[tail]) {
++tail; // tail will never overflow;
cache[tail] = list[index];
}
else {
// find insert pos;
size_t jndex = 0;
for (jndex = head; cache[jndex] < list[index]; jndex = (jndex + 1) % size);
// shift;
++tail;
for (size_t kndex = tail; kndex != jndex; kndex = (kndex + size - 1) % size) {
cache[kndex] = cache[(kndex + size - 1) % size];
}
// insert;
cache[jndex] = list[index];
}
}
// copy;
for (size_t index = head; index != tail; index = (index + 1) % size) {
list[(index + size - head) % size] = cache[index];
}
return;
}
2. 交换排序
2.1. 冒泡排序
void bubbleSort(int* list, size_t size)
{
for (size_t index = size - 1; index > 0; --index) {
for (size_t jndex = 0; jndex < index; ++jndex) {
if (list[jndex] > list[jndex + 1]) {
list[jndex] += list[jndex + 1];
list[jndex + 1] = list[jndex] - list[jndex + 1];
list[jndex] -= list[jndex + 1];
}
}
}
return;
}
2.2. 快速排序
2.2.1. 从数据中随机选取枢轴量
该算法要求枢轴量必须为数据集中已存在的量, 因为递归的依据是每次划分将枢轴量放到正确的位置上.
/* qkSort.h */
#ifndef QKSORT_H_
#define QKSORT_H_
namespace QKSORT_
{
template <typename DataType = int>
static size_t partition(DataType* arr, size_t head, size_t tail, bool (*isnotlt)(DataType, DataType));
template <typename DataType = int>
static void quickSort(DataType* arr, size_t head, size_t tail, bool (*isnotlt)(DataType, DataType));
template <typename DataType = int>
void qksort(DataType* arr, size_t size, bool (*isnotlt)(DataType, DataType) = [](DataType left, DataType right) -> bool { return left >= right; });
}
// The definition and declaration
// of template functions or
// functions in template classes
// must be in the same file;
#include "qkSort.hpp"
#endif
/* qkSort.hpp */
#ifdef QKSORT_H_
#include <iostream>
namespace QKSORT_
{
template <typename DataType>
size_t partition(DataType* arr, size_t head, size_t tail, bool (*isnotlt)(DataType, DataType))
{
// randomly choose the pivot to optimize run time;
size_t pivotInd = head + (size_t)((tail - head) * (rand() / (double)RAND_MAX));
// swap the pivot element to the head position;
DataType pivot = arr[pivotInd];
arr[pivotInd] = arr[head];
arr[head] = pivot;
// partitioning;
while (head < tail) {
while (head < tail && isnotlt(arr[tail], pivot)) {
--tail;
}
arr[head] = arr[tail];
while (head < tail && isnotlt(pivot, arr[head])) {
++head;
}
arr[tail] = arr[head];
}
arr[head] = pivot;
return head;
}
template <typename DataType>
void quickSort(DataType* arr, size_t head, size_t tail, bool (*isnotlt)(DataType, DataType))
{
if (head < tail) {
size_t pivotInd = partition<DataType>(arr, head, tail, isnotlt);
// deal with the front part;
if (head + 1 < pivotInd) { // to avoid size_t overflow;
quickSort<DataType>(arr, head, pivotInd - 1, isnotlt);
}
// deal with the rear part;
if (pivotInd < tail - 1) { // to avoid size_t overflow;
quickSort<DataType>(arr, pivotInd + 1, tail, isnotlt);
}
}
return;
}
template <typename DataType>
void qksort(DataType* arr, size_t size, bool (*isnotlt)(DataType, DataType))
{ // quick sort API;
quickSort<DataType>(arr, 0, size - 1, isnotlt);
return;
}
}
#endif
注意: 由于快排的不稳定性, 实际业务使用随机枢轴量容易导致 bug 无法复现的问题, 可以考虑三点取中或九点取中. 将待取值放入数组局部排序输出中间值即可. 以三点取中为例:
template <typename DataType>
static size_t findMidInd(DataType* list, size_t head, size_t tail)
{
DataType arr[3] = { list[head], list[tail], list[head + (tail - head) / 2] };
size_t ind[3] = { head, tail, head + (tail - head) / 2 };
for (size_t index = 3; index > 0; --index) {
for (size_t jndex = 0; jndex < index - 1; ++jndex) {
if (arr[jndex] > arr[jndex + 1]) {
arr[jndex] += arr[jndex + 1];
arr[jndex + 1] = arr[jndex] - arr[jndex + 1];
arr[jndex] -= arr[jndex + 1];
ind[jndex] += ind[jndex + 1];
ind[jndex + 1] = ind[jndex] - ind[jndex + 1];
ind[jndex] -= ind[jndex + 1];
}
}
}
return ind[1];
}
2.2.2. 任意选取枢轴量
该算法允许枢轴量不为数据集中的值(如可选取平均值为枢轴量), 但需为数据集最大值和最小值(包括端点)之间的值.
void partition(int* list, size_t& head, size_t& tail)
{
// 特定的枢轴量取法可能导致2个数据分划时无限循环;
// 故需单独处理 head + 1 == tail 的情况;
if (head + 1 == tail) {
if (list[head] > list[tail]) {
list[head] += list[tail];
list[tail] = list[head] - list[tail];
list[head] -= list[tail];
}
head += tail;
tail = head - tail;
head -= tail;
return;
}
// 待分划数据至少为3个时;
int pivot = getPivot();
while (head < tail) {
while (head <= tail && list[tail] > pivot) {
--tail;
}
while (head <= tail && list[head] < pivot) {
++head;
}
if (head < tail) {
int cache = list[head];
list[head] = list[tail];
list[tail] = cache;
--tail;
++head;
}
// 当探针在端点重合时, 判定重合值分配到哪一半数据中;
if (head == tail) {
if (list[head] < pivot) {
++head;
}
else {
--tail;
}
}
}
return;
}
void quickSort(int* list, size_t head, size_t tail)
{
if (head < tail) {
size_t backHead = head;
size_t frontTail = tail;
partition(list, backHead, frontTail);
if (head < frontTail) {
quickSort(list, head, frontTail);
}
if (backHead < tail) {
quickSort(list, backHead, tail);
}
}
return;
}
2.2.3. 非递归快速排序
void partition(int* list, size_t& head, size_t& tail)
{
// 单独处理 head + 1 == tail 的情况;
if (head + 1 == tail) {
if (list[head] > list[tail]) {
list[head] += list[tail];
list[tail] = list[head] - list[tail];
list[head] -= list[tail];
}
head += tail;
tail = head - tail;
head -= tail;
}
else {
int pivot = getPivot();
while (head < tail) {
while (head <= tail && list[tail] > pivot) {
--tail;
}
while (head <= tail && list[head] < pivot) {
++head;
}
if (head < tail) {
int cache = list[head];
list[head] = list[tail];
list[tail] = cache;
--tail;
++head;
}
if (head == tail) {
if (list[head] < pivot) {
++head;
}
else {
--tail;
}
}
}
}
return;
}
void quickSort(int* list, size_t n)
{
typedef struct stackNode {
size_t head = 0;
size_t tail = 0;
stackNode* next = nullptr;
} stackNode, *stack;
stack stk = new stackNode;
stk->next = new stackNode;
stk->next->head = 0;
stk->next->tail = n - 1;
size_t backHead = 0;
size_t frontTail = n - 1;
stackNode* newNode = nullptr;
stackNode* cache = nullptr;
while (stk->next != nullptr) {
// save top node;
cache = stk->next;
// pop top;
stk->next = stk->next->next;
backHead = cache->head;
frontTail = cache->tail;
partition(list, backHead, frontTail);
// 先右半再左半;
if (backHead < cache->tail) {
newNode = new stackNode;
newNode->head = backHead;
newNode->tail = cache->tail;
// push into stack;
newNode->next = stk->next;
stk->next = newNode;
}
if (cache->head < frontTail) {
newNode = new stackNode;
newNode->head = cache->head;
newNode->tail = frontTail;
// push into stack;
newNode->next = stk->next;
stk->next = newNode;
}
delete cache;
}
return;
}
2.2.4. 快速排序搜索第 k
小的元素
int qsFind(int* list, size_t head, size_t tail, size_t targetOrder)
{
if (head <= tail) {
size_t pivotInd = partition(list, head, tail);
if (pivotInd == targetOrder - 1) {
return list[pivotInd];
}
else if (pivotInd > targetOrder - 1 && pivotInd > head) {
return qsFind(list, head, pivotInd - 1, targetOrder);
}
else if (pivotInd < targetOrder - 1 && pivotInd < tail) {
return qsFind(list, pivotInd + 1, tail, targetOrder);
}
}
return 0;
}
2.2.5. 单链表的快速排序
本质上是按“小于枢轴”“枢轴”“大于枢轴”考虑的荷兰国旗问题, 用移动指针就可以解决. 如果不带头结点, 强行加一个就好了.
3. 选择排序
3.1. 堆排序
3.1.1. 堆排序
以大顶堆为例, 输出的顺序是降序输出, 但输出结果是升序序列. 首先自顶向下构建大顶堆:
// suppose list[0] to list[listLen - 2] is already heapified;
// now add new element list[listLen - 1] and heapify list;
// time complexity O(log listLen), space complexity O(1);
void heapify(int* list, size_t listLen)
{
if (listLen == 0) {
return;
}
// for maxheap, parent > anotherChild;
// so list[listLen] > parent is sufficient for exchanging;
size_t ind = listLen - 1;
while (ind != 0 && list[(ind - 1) / 2] < list[ind]) {
list[(ind - 1) / 2] += list[ind];
list[ind] = list[(ind - 1) / 2] - list[ind];
list[(ind - 1) / 2] -= list[ind];
ind = (ind - 1) / 2;
}
return;
}
void buildMaxHeap(int* list, size_t listLen)
{
for (size_t len = 1; len <= listLen; ++len) {
heapify(list, len);
}
return;
}
然后不断输出堆顶元素至堆尾, 并将剩余元素重建为大顶堆:
void heapSort(int* list, size_t listLen)
{
for (size_t index = 0; index < listLen; ++index) {
// (re)build maxheap;
buildMaxHeap(list, listLen - index);
// output local max element to the end of the heap;
int cache = list[0];
list[0] = list[listLen - index - 1];
list[listLen - index - 1] = cache;
}
return;
}
3.1.2. 优化建堆的过程
自底向上的建堆方式的时间复杂度更低(证明):
void heapify(int* list, size_t listLen, size_t root)
{
while (root < listLen) {
if (2 * root + 1 >= listLen) {
return;
}
size_t lchildInd = 2 * root + 1;
size_t rchildInd = lchildInd + 1;
size_t maxChildInd = (list[lchildInd] < list[(rchildInd < listLen) ? rchildInd : lchildInd]) ? rchildInd : lchildInd;
if (list[root] >= list[maxChildInd]) {
return;
}
list[root] += list[maxChildInd];
list[maxChildInd] = list[root] - list[maxChildInd];
list[root] -= list[maxChildInd];
root = maxChildInd;
}
return;
}
void buildMaxHeap(int* list, size_t listLen)
{
size_t index = listLen / 2; // the first leaf node;
do {
--index;
heapify(list, listLen, index);
} while (index != 0);
return;
}
注意到除第一次新建堆外, 重建堆的过程实际上只需移动新的堆顶元素, 故可以在重建时直接追踪新的堆顶. 此时如果仍用原来的 buildMaxHeap
, 其复杂度也会退化为 O(log listLen)
, 因为只有涉及到新堆顶元素节点时才需要交换.
最终得到的堆排序如下
void heapSort(int* list, size_t listLen)
{
// build maxheap;
buildMaxHeap(list, listLen);
for (size_t index = 0; index < listLen; ++index) {
// output local max element to the end of the heap;
int cache = list[0];
list[0] = list[listLen - index - 1];
list[listLen - index - 1] = cache;
heapify(list, listLen - index - 1, 0);
}
return;
}
3.1.3. 搜索第 k
小的元素
这里沿用3.1.1.节中的建堆过程 buildMaxHeap()
和3.1.2.节中的重建过程 rebuildMaxHeap()
.
// k: the function will find the k-th minimal element;
int hsFind(int* list, size_t listLen, size_t k)
{
if (listLen < k) {
throw std::bad_alloc();
}
buildMaxHeap(list, k);
for (size_t index = k; index < listLen; ++index) {
if (list[index] >= list[0]) {
continue;
}
int cache = list[0];
list[0] = list[index];
list[index] = cache;
heapify(list, k, 0);
}
return list[0];
}
4. 归并排序
4.1. 二路归并
4.2. 分段二路归并
该算法先将原数据集分为若干有序段, 再对这些数据段进行归并. 该实现是针对链表设计的.
LNode<int>** buildPList(LinkedList<int>& list, size_t& len)
{
if (len == 0) {
return nullptr;
}
LNode<int>* pre = list;
LNode<int>* mov = list->next;
LNode<int>** pList = new LNode<int>*[len];
len = 0;
pList[len] = mov;
pre = pre->next;
mov = mov->next;
++len;
while (mov != nullptr) {
if (pre->data > mov->data) {
pList[len] = mov;
++len;
}
pre = pre->next;
mov = mov->next;
}
return pList;
}
void mergeSort(LinkedList<int>& list, size_t n)
{
size_t len = n;
LNode<int>** pList = buildPList(list, len);
LNode<int>** cacheList = nullptr;
LNode<int>* head = nullptr;
LNode<int>* rear = nullptr;
size_t index = 0;
while (len != 1) {
index = 0;
cacheList = new LNode<int>*[len / 2 + len % 2];
while (2 * index + 1 < len) {
size_t jndex = 0;
size_t kndex = 0;
head = new LNode<int>;
rear = head;
LNode<int>* mov1 = pList[2 * index];
LNode<int>* mov2 = pList[2 * index + 1];
// merge pList[2 * index] and pList[2 * index + 1];
while (mov1 != nullptr && mov1 != pList[2 * index + 1] && mov2 != nullptr && mov2 != pList[2 * index + 2]) {
if (mov1->data <= mov2->data) {
rear->next = new LNode<int>;
rear->next->data = mov1->data;
mov1 = mov1->next;
rear = rear->next;
}
else {
rear->next = new LNode<int>;
rear->next->data = mov2->data;
mov2 = mov2->next;
rear = rear->next;
}
}
while (mov1 != nullptr && mov1 != pList[2 * index + 1]) {
rear->next = new LNode<int>;
rear->next->data = mov1->data;
mov1 = mov1->next;
rear = rear->next;
}
while (mov2 != nullptr && mov2 != pList[2 * index + 2]) {
rear->next = new LNode<int>;
rear->next->data = mov2->data;
mov2 = mov2->next;
rear = rear->next;
}
// update new section to cache;
cacheList[index] = head->next;
delete head;
++index;
}
if (2 * index < len) {
cacheList[index] = pList[2 * index];
}
// update len;
len = len / 2 + len % 2;
// update cache to pList;
delete[] pList;
pList = new LNode<int>*[len];
for (size_t qndex = 0; qndex < len; ++qndex) {
pList[qndex] = cacheList[qndex];
}
delete[] cacheList;
}
// update pList to list;
for (size_t qndex = 0; qndex < len; ++qndex) {
list[qndex] = pList[0][qndex];
}
delete[] pList;
return;
}
5. 基数排序
基数排序不依赖于比较和交换, 常用于链表排序.
// sortDigit: usually means the max number of digits an element can have;
// base: the base system in use;
void radixSort(LinkedList<unsigned int> list, size_t sortDigit, size_t base = 10)
{
// init buckets;
LinkedList<unsigned int>* buckets = new LinkedList<unsigned int>[base];
LNode<unsigned int>** rears = new LNode<unsigned int>*[base];
for (size_t index = 0; index < base; ++index) {
buckets[index] = new LNode<unsigned int>;
rears[index] = buckets[index];
}
size_t countSortedDigit = 0;
while (countSortedDigit < sortDigit) {
while (list->next != nullptr) {
// find the digit to be sorted;
size_t tarDigit = list->next->data;
for (size_t index = 0; index < countSortedDigit; ++index) {
tarDigit /= base;
}
tarDigit %= base;
// push into corresponding queue;
rears[tarDigit]->next = list->next;
list->next = list->next->next;
rears[tarDigit] = rears[tarDigit]->next;
rears[tarDigit]->next = nullptr;
}
// recombine list;
LNode<unsigned int>* rear = list;
for (size_t index = 0; index < base; ++index) {
rear->next = buckets[index]->next;
if (rear->next != nullptr) {
rear = rears[index];
}
}
// clear buckets and rears;
for (size_t index = 0; index < base; ++index) {
buckets[index]->next = nullptr;
rears[index] = buckets[index];
}
++countSortedDigit;
}
// del buckets and rears;
delete[] rears;
for (size_t index = 0; index < base; ++index) {
delete buckets[index];
}
delete[] buckets;
return;
}
附录
重要内容更新日志
2022-09-22 标准化了快速排序三点取中指标函数 findMidInd
.
2022-09-24 增加了堆排序并进行了简单优化, 利用新的堆排序模块重写了搜索第 k
小的元素的方法.
2022-09-25 增加了自底向上的堆排序, 并修改了其它相关方法的实现.
2022-09-29 增加了基数排序.