|
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include <malloc.h>
#include <memory.h>
#define MAX_SIZE (1000 * 10000 + 1)
#define PARENT(i) (i/2)
#define RIGHT(i) (i*2 + 1)
#define LEFT(i) (i*2)
#define EXCHANGE(a,b,t) do{t=a;a=b;b=t;}while(0)
// 生成不重复的随机数序列写入文件
void gen_test_data(uint32_t cnt)
{
if( cnt >= MAX_SIZE){printf("cnt too largr\n");return;}
//uint32_t i = 0;
//char *buf = (char*)malloc(MAX_SIZE);
//for(;i < cnt;++i){buf = 1;}
uint32_t n = 0;
char file_name[256];
snprintf(file_name,256,"test_data_%d.txt",cnt);
FILE *fp = fopen(file_name,"w");
if(NULL == fp){printf("open %s error!\n",file_name);return;}
while(n < cnt)
{
int32_t nRand = rand() % cnt;
//while(buf[nRand] == 0)nRand = (nRand + 1)%cnt;
//buf[nRand] = 0;
fprintf(fp,"%d ",nRand);
++n;
}
fclose(fp);
printf("gen %s finished\n",file_name);
}
// 读取文件
void read_data(int32_t arr[],const uint32_t size,uint32_t *cnt,const uint32_t data_cnt)
{
FILE *fp = NULL;
*cnt = 0;
char file_name[256];
if(data_cnt > size){printf("data_cnt too largr\n");return;}
snprintf(file_name,256,"test_data_%d.txt",data_cnt);
fp = fopen(file_name,"r");
if(NULL == fp){printf("open %s error!\n",file_name);return;}
while(!feof(fp) && *cnt < size)
{
fscanf(fp,"%d ",&arr[*cnt]);
(*cnt)++;
}
fclose(fp);
}
// 快速排序
void quick_sort(int32_t arr[],int32_t low,int32_t high)
{
if(low >= high)return;
int32_t i = low,j = high,tmp = arr;
while(i<j)
{
while(i<j && arr[j] <= tmp)j--;
if(i<j){arr = arr[j];i++;}
while(i<j && arr > tmp)i++;
if(i<j){arr[j] = arr;j--;}
}
arr = tmp;
quick_sort(arr,low,i-1);
quick_sort(arr,i+1,high);
}
void get_topn_quick(int32_t arr[],int32_t low,int32_t high,const int32_t topn)
{
if(low >= high || topn > high)return;
int32_t i = low,j = high,tmp = arr;
while(i<j)
{
while(i<j && arr[j] < tmp)j--;
if(i<j)arr[i++] = arr[j];
while(i<j && arr >= tmp)i++;
if(i<j)arr[j--] = arr;
}
arr = tmp;
int32_t n = i - low + 1;
if (n == topn)return;
else if (n > topn)
get_topn_quick(arr, low, i-1, topn);
else if (n < topn)
get_topn_quick(arr, i+1, high, topn - n);
}
void max_heapify(int32_t arr[],const uint32_t size,uint32_t i)
{
uint32_t left = LEFT(i),right = RIGHT(i),largest = 0,tmp = 0;
if(left<size && arr[left] > arr)largest = left;
else largest = i;
if(right<size && arr[right] > arr[largest])largest = right;
if(largest != i)
{
EXCHANGE(arr,arr[largest],tmp);
max_heapify(arr,size,largest);
}
}
void min_heapify(int32_t arr[],const uint32_t size,uint32_t i)
{
uint32_t left = LEFT(i),right = RIGHT(i),largest = 0,tmp = 0;
if(left<size && arr[left] < arr)largest = left;
else largest = i;
if(right<size && arr[right] < arr[largest])largest = right;
if(largest != i)
{
EXCHANGE(arr,arr[largest],tmp);
min_heapify(arr,size,largest);
}
}
void get_topn_heap(int32_t arr[], const int32_t arr_size, const int32_t topn)
{
int32_t i = topn / 2, tmp = 0;
// 在[0--topn)范围内构建最小堆,即优先级队列
while (i >= 0)min_heapify(arr, topn, i--);
for (i = topn; i < arr_size; ++i)
{
if (arr <= arr[0])continue; //小于最小值,没有判断的必要
EXCHANGE(arr[0], arr, tmp);
min_heapify(arr, topn, 0);
}
}
void dump1(int32_t arr[],const uint32_t cnt)
{
uint32_t i = 0;
for(;i < cnt;++i)
{
printf("%4d ",arr);
}
printf("\n");
}
void dump2(int32_t arr[],const uint32_t start,const uint32_t end)
{
uint32_t i = start;
for(;i < end;++i)
{
printf("%5d ",arr);
}
printf("\n");
}
int32_t main(int32_t argc, char *argv[])
{
uint32_t t = 0;
int32_t *arr = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
int32_t *heap = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
int32_t *quick = (int32_t*)malloc(sizeof(int32_t)*MAX_SIZE);
uint32_t cnt = 0,data_cnt = 0;
for(cnt = 10;cnt <= MAX_SIZE;cnt*=10)
{
gen_test_data(cnt);
}
for(data_cnt = 10;data_cnt <= MAX_SIZE;data_cnt*=10)
{
read_data(arr, MAX_SIZE, &cnt, data_cnt);
memcpy(heap,arr,sizeof(int32_t)*MAX_SIZE);
printf("cnt=%d\n",cnt);
t = clock();
get_topn_heap(heap,cnt,cnt/10);
printf("heap use time:%ld\n",clock()-t);
quick_sort(heap,0,cnt/2-1);
//dump2(heap,0,cnt/10);
memcpy(quick,arr,sizeof(int32_t)*MAX_SIZE);
t = clock();
get_topn_quick(quick,0,cnt-1,cnt/10);
printf("quick use time:%ld\n",clock()-t);
quick_sort(quick,0,cnt/2-1);
//dump2(quick,0,cnt/10);
if(memcmp(heap,quick,sizeof(int32_t)*(cnt/10-1)) == 0)printf("OK\n");
}
return 0;
}
函数 get_topn_heap 实现了用最小堆查找数组arr中最大topn个数字,并将它们放置在数组中[0-tonp)的位置
与前面的用快速排序的方法相比,用最小堆的方法效率稍低一些,快速排序方法:http://www.cnblogs.com/tangxin-blog/p/5617736.html
对比数据:
|
|