# 练习35：排序和搜索

``````#include <lcthw/darray_algos.h>
#include <stdlib.h>

int DArray_qsort(DArray *array, DArray_compare cmp)
{
qsort(array->contents, DArray_count(array), sizeof(void *), cmp);
return 0;
}

int DArray_heapsort(DArray *array, DArray_compare cmp)
{
return heapsort(array->contents, DArray_count(array), sizeof(void *), cmp);
}

int DArray_mergesort(DArray *array, DArray_compare cmp)
{
return mergesort(array->contents, DArray_count(array), sizeof(void *), cmp);
}
``````

``````#ifndef darray_algos_h
#define darray_algos_h

#include <lcthw/darray.h>

typedef int (*DArray_compare)(const void *a, const void *b);

int DArray_qsort(DArray *array, DArray_compare cmp);

int DArray_heapsort(DArray *array, DArray_compare cmp);

int DArray_mergesort(DArray *array, DArray_compare cmp);

#endif
``````

``````#include "minunit.h"
#include <lcthw/darray_algos.h>

int testcmp(char **a, char **b)
{
return strcmp(*a, *b);
}

DArray *create_words()
{
DArray *result = DArray_create(0, 5);
char *words[] = {"asdfasfd", "werwar", "13234", "asdfasfd", "oioj"};
int i = 0;

for(i = 0; i < 5; i++) {
DArray_push(result, words[i]);
}

return result;
}

int is_sorted(DArray *array)
{
int i = 0;

for(i = 0; i < DArray_count(array) - 1; i++) {
if(strcmp(DArray_get(array, i), DArray_get(array, i+1)) > 0) {
return 0;
}
}

return 1;
}

char *run_sort_test(int (*func)(DArray *, DArray_compare), const char *name)
{
DArray *words = create_words();
mu_assert(!is_sorted(words), "Words should start not sorted.");

debug("--- Testing %s sorting algorithm", name);
int rc = func(words, (DArray_compare)testcmp);
mu_assert(rc == 0, "sort failed");
mu_assert(is_sorted(words), "didn't sort it");

DArray_destroy(words);

return NULL;
}

char *test_qsort()
{
return run_sort_test(DArray_qsort, "qsort");
}

char *test_heapsort()
{
return run_sort_test(DArray_heapsort, "heapsort");
}

char *test_mergesort()
{
return run_sort_test(DArray_mergesort, "mergesort");
}

char * all_tests()
{
mu_suite_start();

mu_run_test(test_qsort);
mu_run_test(test_heapsort);
mu_run_test(test_mergesort);

return NULL;
}

RUN_TESTS(all_tests);
``````

## 基数排序和二分搜索

``````#ifndef _radixmap_h
#include <stdint.h>

typedef union RMElement {
uint64_t raw;
struct {
uint32_t key;
uint32_t value;
} data;
} RMElement;

size_t max;
size_t end;
uint32_t counter;
RMElement *contents;
RMElement *temp;

#endif
``````

## C联合体

``````#include <stdio.h>

typedef enum {
TYPE_INT,
TYPE_FLOAT,
TYPE_STRING,
} VariantType;

struct Variant {
VariantType type;
union {
int as_integer;
float as_float;
char *as_string;
} data;
};

typedef struct Variant Variant;

void Variant_print(Variant *var)
{
switch(var->type) {
case TYPE_INT:
printf("INT: %d\n", var->data.as_integer);
break;
case TYPE_FLOAT:
printf("FLOAT: %f\n", var->data.as_float);
break;
case TYPE_STRING:
printf("STRING: %s\n", var->data.as_string);
break;
default:
printf("UNKNOWN TYPE: %d", var->type);
}
}

int main(int argc, char *argv[])
{
Variant a_int = {.type = TYPE_INT, .data.as_integer = 100};
Variant a_float = {.type = TYPE_FLOAT, .data.as_float = 100.34};
Variant a_string = {.type = TYPE_STRING, .data.as_string = "YO DUDE!"};

Variant_print(&a_int);
Variant_print(&a_float);
Variant_print(&a_string);

// here's how you access them
a_int.data.as_integer = 200;
a_float.data.as_float = 2.345;
a_string.data.as_string = "Hi there.";

Variant_print(&a_int);
Variant_print(&a_float);
Variant_print(&a_string);

return 0;
}
``````

`radixmap.h`文件中我创建了`RMElement`联合体，用于在类型之间转换内存块。这里，我希望存储`uint64_t`定长整数用于排序目录，但是我也希望使用两个`uint32_t`用于表示数据的`key``value`对。通过使用联合体我就能够使用所需的两种不同方法来访问内存。

## 实现

``````/*
* Based on code by Andre Reinald then heavily modified by Zed A. Shaw.
*/

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <lcthw/dbg.h>

{
check_mem(map);

map->contents = calloc(sizeof(RMElement), max + 1);
check_mem(map->contents);

map->temp = calloc(sizeof(RMElement), max + 1);
check_mem(map->temp);

map->max = max;
map->end = 0;

return map;
error:
return NULL;
}

{
if(map) {
free(map->contents);
free(map->temp);
free(map);
}
}

#define ByteOf(x,y) (((uint8_t *)x)[(y)])

static inline void radix_sort(short offset, uint64_t max, uint64_t *source, uint64_t *dest)
{
uint64_t count[256] = {0};
uint64_t *cp = NULL;
uint64_t *sp = NULL;
uint64_t *end = NULL;
uint64_t s = 0;
uint64_t c = 0;

// count occurences of every byte value
for (sp = source, end = source + max; sp < end; sp++) {
count[ByteOf(sp, offset)]++;
}

// transform count into index by summing elements and storing into same array
for (s = 0, cp = count, end = count + 256; cp < end; cp++) {
c = *cp;
*cp = s;
s += c;
}

// fill dest with the right values in the right place
for (sp = source, end = source + max; sp < end; sp++) {
cp = count + ByteOf(sp, offset);
dest[*cp] = *sp;
++(*cp);
}
}

{
uint64_t *source = &map->contents[0].raw;
uint64_t *temp = &map->temp[0].raw;

}

{
int low = 0;
int high = map->end - 1;
RMElement *data = map->contents;

while (low <= high) {
int middle = low + (high - low)/2;
uint32_t key = data[middle].data.key;

if (to_find < key) {
high = middle - 1;
} else if (to_find > key) {
low = middle + 1;
} else {
return &data[middle];
}
}

return NULL;
}

{
check(key < UINT32_MAX, "Key can't be equal to UINT32_MAX.");

RMElement element = {.data = {.key = key, .value = value}};
check(map->end + 1 < map->max, "RadixMap is full.");

map->contents[map->end++] = element;

return 0;

error:
return -1;
}

{
check(map->end > 0, "There is nothing to delete.");
check(el != NULL, "Can't delete a NULL element.");

el->data.key = UINT32_MAX;

if(map->end > 1) {
// don't bother resorting a map of 1 length
}

map->end--;

return 0;
error:
return -1;
}
``````

``````#include "minunit.h"
#include <time.h>

{
size_t i = 0;

for (i = 0; i < map->max - 1; i++) {
uint32_t key = (uint32_t)(rand() | (rand() << 16));
}

return i;

error:
return 0;
}

{
RMElement d1, d2;
unsigned int i = 0;

// only signal errors if any (should not be)
for (i = 0; map->end > 0 && i < map->end-1; i++) {
d1 = map->contents[i];
d2 = map->contents[i+1];

if(d1.data.key > d2.data.key) {
debug("FAIL:i=%u, key: %u, value: %u, equals max? %d\n", i, d1.data.key, d1.data.value,
d2.data.key == UINT32_MAX);
return 0;
}
}

return 1;
}

{
unsigned i = 0;
RMElement *d = NULL;
RMElement *found = NULL;

for(i = map->end / 2; i < map->end; i++) {
d = &map->contents[i];
check(found != NULL, "Didn't find %u at %u.", d->data.key, i);
check(found->data.key == d->data.key, "Got the wrong result: %p:%u looking for %u at %u",
found, found->data.key, d->data.key, i);
}

return 1;
error:
return 0;
}

// test for big number of elements
static char *test_operations()
{
size_t N = 200;

mu_assert(map != NULL, "Failed to make the map.");
mu_assert(make_random(map), "Didn't make a random fake radix map.");

mu_assert(check_order(map), "Failed to properly sort the RadixMap.");

mu_assert(test_search(map), "Failed the search test.");
mu_assert(check_order(map), "RadixMap didn't stay sorted after search.");

while(map->end > 0) {
RMElement *el = RadixMap_find(map, map->contents[map->end / 2].data.key);
mu_assert(el != NULL, "Should get a result.");

size_t old_end = map->end;

mu_assert(RadixMap_delete(map, el) == 0, "Didn't delete it.");
mu_assert(old_end - 1 == map->end, "Wrong size after delete.");

// test that the end is now the old value, but uint32 max so it trails off
mu_assert(check_order(map), "RadixMap didn't stay sorted after delete.");
}

return NULL;
}

char *all_tests()
{
mu_suite_start();
srand(time(NULL));

mu_run_test(test_operations);

return NULL;
}

RUN_TESTS(all_tests);
``````

`radixmap.c`中的大多数操作都易于理解，如果你阅读代码的话。下面是每个基本函数作用及其工作原理的描述：

• 基于数组大小设置上界和下界。
• 获取上下界之间的中间元素。
• 如果键小于这个元素的值，就一定在它前面，所以上界设置为中间元素。
• 如果键大于这个元素的值，就一定在它后面，所以下界设置为中间元素。
• 继续循环直到上界和下界越过了彼此。如果退出了循环则没有找到。

• 按照它们的个位，将数字放入桶中：`[380, 100, 120], [912], [633, 223], [275]`
• 现在遍历每个桶中的数字，接着按十位排序：`[100], [912], [120, 223], [633], [275], [380]`
• 现在每个桶都包含了按照个位和十位排序后的数字。接着我需要按照这个顺序遍历，并把它们放入最后百位的桶中：`[100, 120], [223, 275], [380], [633], [912]`
• 到现在为止，每个数字都按照百位、十位和个位排序，并且如果我按照顺序遍历每个桶，我会得到最终排序的结果：`100, 120, 223, 275, 380, 633, 912`

## 如何改进

• 使用二分搜索来寻找新元素的最小位置，只对这个位置到微末之间进行排序。你需要找到它，将新元素放到末尾，之后对它们之间进行排序。大多数情况下这会显著地缩减排序范围。
• 跟踪当前所使用的最大的键，之后只对足够的位数进行排序，来处理这个键。你也可以跟踪最小的数值，之后只对范围中必要的字节进行排序。为了这样做，你需要关心CPU的整数存储顺序（大小端序）。

## 附加题

• 实现快速排序、堆排序和归并排序，并且提供一个`#define`让其他人在二者（标准库和你的实现）当中进行选择，或者创建另一套不同名称的函数。使用我教给你的技巧，阅读维基百科的算法页面，之后参照伪代码来实现它。
• 对比你的实现和标准库实现的性能。
• 使用这些排序函数创建`DArray_sort_add`，它可以向`DArray`添加元素，但是随后对数组排序。
• 编写`DArray_find`，使用`RadixMap_find`中的二分搜索算法和`DArray_compare`，来在有序的`DArray`中寻找元素。