Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino
Contents of /trunk/src/sorting.c
Parent Directory
|
Revision Log
Revision 1020 -
(show annotations)
(download)
(as text)
Sun Jul 15 13:47:19 2012 UTC (12 years, 8 months ago) by amb
File MIME type: text/x-csrc
File size: 23517 byte(s)
Sun Jul 15 13:47:19 2012 UTC (12 years, 8 months ago) by amb
File MIME type: text/x-csrc
File size: 23517 byte(s)
Don't call any of the pthread functions unless running with multiple threads.
1 | /*************************************** |
2 | Merge sort functions. |
3 | |
4 | Part of the Routino routing software. |
5 | ******************/ /****************** |
6 | This file Copyright 2009-2012 Andrew M. Bishop |
7 | |
8 | This program is free software: you can redistribute it and/or modify |
9 | it under the terms of the GNU Affero General Public License as published by |
10 | the Free Software Foundation, either version 3 of the License, or |
11 | (at your option) any later version. |
12 | |
13 | This program is distributed in the hope that it will be useful, |
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | GNU Affero General Public License for more details. |
17 | |
18 | You should have received a copy of the GNU Affero General Public License |
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
20 | ***************************************/ |
21 | |
22 | |
23 | #include <stdio.h> |
24 | #include <stdlib.h> |
25 | #include <string.h> |
26 | #include <assert.h> |
27 | |
28 | #if defined(USE_PTHREADS) && USE_PTHREADS |
29 | #include <pthread.h> |
30 | #endif |
31 | |
32 | #include "types.h" |
33 | |
34 | #include "files.h" |
35 | #include "sorting.h" |
36 | |
37 | |
38 | /* Global variables */ |
39 | |
40 | /*+ The command line '--tmpdir' option or its default value. +*/ |
41 | extern char *option_tmpdirname; |
42 | |
43 | /*+ The amount of RAM to use for filesorting. +*/ |
44 | extern size_t option_filesort_ramsize; |
45 | |
46 | /*+ The number of filesorting threads allowed. +*/ |
47 | extern int option_filesort_threads; |
48 | |
49 | |
50 | /* Thread data type definitions */ |
51 | |
52 | /*+ A data type for holding data for a thread. +*/ |
53 | typedef struct _thread_data |
54 | { |
55 | pthread_t thread; /*+ The thread identifier. +*/ |
56 | |
57 | int running; /*+ A flag indicating the current state of the thread. +*/ |
58 | |
59 | void *data; /*+ The main data array. +*/ |
60 | void **datap; /*+ An array of pointers to the data objects. +*/ |
61 | size_t n; /*+ The number of pointers. +*/ |
62 | |
63 | char *filename; /*+ The name of the file to write the results to. +*/ |
64 | |
65 | size_t itemsize; /*+ The size of each item. +*/ |
66 | int (*compare)(const void*,const void*); /*+ The comparison function. +*/ |
67 | } |
68 | thread_data; |
69 | |
70 | /* Thread variables */ |
71 | |
72 | #if defined(USE_PTHREADS) && USE_PTHREADS |
73 | |
74 | static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER; |
75 | static pthread_cond_t running_cond = PTHREAD_COND_INITIALIZER; |
76 | |
77 | #endif |
78 | |
79 | /* Thread helper functions */ |
80 | |
81 | static void *filesort_fixed_heapsort_thread(thread_data *thread); |
82 | static void *filesort_vary_heapsort_thread(thread_data *thread); |
83 | |
84 | |
85 | /*++++++++++++++++++++++++++++++++++++++ |
86 | A function to sort the contents of a file of fixed length objects using a |
87 | limited amount of RAM. |
88 | |
89 | The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort |
90 | and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting. |
91 | The individual sort steps and the merge step both use a "Heap sort" |
92 | http://en.wikipedia.org/wiki/Heapsort. The combination of the two should work well |
93 | if the data is already partially sorted. |
94 | |
95 | index_t filesort_fixed Returns the number of objects kept. |
96 | |
97 | int fd_in The file descriptor of the input file (opened for reading and at the beginning). |
98 | |
99 | int fd_out The file descriptor of the output file (opened for writing and empty). |
100 | |
101 | size_t itemsize The size of each item in the file that needs sorting. |
102 | |
103 | int (*compare)(const void*, const void*) The comparison function (identical to qsort if the |
104 | data to be sorted is an array of things not pointers). |
105 | |
106 | int (*keep)(void *,index_t) If non-NULL then this function is called for each item, if it |
107 | returns 1 then the object is kept and written to the output file. |
108 | ++++++++++++++++++++++++++++++++++++++*/ |
109 | |
110 | index_t filesort_fixed(int fd_in,int fd_out,size_t itemsize,int (*compare)(const void*,const void*),int (*keep)(void*,index_t)) |
111 | { |
112 | int *fds=NULL,*heap=NULL; |
113 | int nfiles=0,ndata=0; |
114 | index_t count=0,total=0; |
115 | size_t nitems=option_filesort_ramsize/(option_filesort_threads*(itemsize+sizeof(void*))); |
116 | void *data,**datap; |
117 | thread_data *threads; |
118 | int i,more=1; |
119 | #if defined(USE_PTHREADS) && USE_PTHREADS |
120 | int nthreads=0; |
121 | #endif |
122 | |
123 | /* Allocate the RAM buffer and other bits */ |
124 | |
125 | threads=(thread_data*)malloc(option_filesort_threads*sizeof(thread_data)); |
126 | |
127 | for(i=0;i<option_filesort_threads;i++) |
128 | { |
129 | threads[i].running=0; |
130 | |
131 | threads[i].data=malloc(nitems*itemsize); |
132 | threads[i].datap=malloc(nitems*sizeof(void*)); |
133 | |
134 | threads[i].filename=(char*)malloc(strlen(option_tmpdirname)+24); |
135 | |
136 | threads[i].itemsize=itemsize; |
137 | threads[i].compare=compare; |
138 | } |
139 | |
140 | /* Loop around, fill the buffer, sort the data and write a temporary file */ |
141 | |
142 | do |
143 | { |
144 | int thread=0; |
145 | |
146 | #if defined(USE_PTHREADS) && USE_PTHREADS |
147 | |
148 | if(option_filesort_threads>1) |
149 | { |
150 | /* Find a spare slot (one *must* be unused at all times) */ |
151 | |
152 | pthread_mutex_lock(&running_mutex); |
153 | |
154 | for(thread=0;thread<option_filesort_threads;thread++) |
155 | if(!threads[thread].running) |
156 | break; |
157 | |
158 | pthread_mutex_unlock(&running_mutex); |
159 | } |
160 | |
161 | #endif |
162 | |
163 | /* Read in the data and create pointers */ |
164 | |
165 | for(i=0;i<nitems;i++) |
166 | { |
167 | threads[thread].datap[i]=threads[thread].data+i*itemsize; |
168 | |
169 | if(ReadFile(fd_in,threads[thread].datap[i],itemsize)) |
170 | { |
171 | more=0; |
172 | break; |
173 | } |
174 | |
175 | total++; |
176 | } |
177 | |
178 | threads[thread].n=i; |
179 | |
180 | /* Shortcut if there is no previous data and no more data (i.e. no data at all) */ |
181 | |
182 | if(more==0 && total==0) |
183 | goto tidy_and_exit; |
184 | |
185 | /* No new data read in this time round */ |
186 | |
187 | if(threads[thread].n==0) |
188 | break; |
189 | |
190 | /* Sort the data pointers using a heap sort (potentially in a thread) */ |
191 | |
192 | sprintf(threads[thread].filename,"%s/filesort.%d.tmp",option_tmpdirname,nfiles); |
193 | |
194 | #if defined(USE_PTHREADS) && USE_PTHREADS |
195 | |
196 | if(option_filesort_threads>1) |
197 | { |
198 | pthread_mutex_lock(&running_mutex); |
199 | |
200 | while(nthreads==(option_filesort_threads-1)) |
201 | { |
202 | for(i=0;i<option_filesort_threads;i++) |
203 | if(threads[i].running==2) |
204 | { |
205 | pthread_join(threads[i].thread,NULL); |
206 | threads[i].running=0; |
207 | nthreads--; |
208 | } |
209 | |
210 | if(nthreads==(option_filesort_threads-1)) |
211 | pthread_cond_wait(&running_cond,&running_mutex); |
212 | } |
213 | |
214 | threads[thread].running=1; |
215 | |
216 | pthread_mutex_unlock(&running_mutex); |
217 | |
218 | pthread_create(&threads[thread].thread,NULL,(void* (*)(void*))filesort_fixed_heapsort_thread,&threads[thread]); |
219 | |
220 | nthreads++; |
221 | } |
222 | else |
223 | |
224 | #else |
225 | |
226 | filesort_fixed_heapsort_thread(&threads[thread]); |
227 | |
228 | #endif |
229 | |
230 | nfiles++; |
231 | } |
232 | while(more); |
233 | |
234 | /* Wait for all of the threads to finish */ |
235 | |
236 | #if defined(USE_PTHREADS) && USE_PTHREADS |
237 | |
238 | while(option_filesort_threads>1 && nthreads) |
239 | { |
240 | pthread_mutex_lock(&running_mutex); |
241 | |
242 | pthread_cond_wait(&running_cond,&running_mutex); |
243 | |
244 | for(i=0;i<option_filesort_threads;i++) |
245 | if(threads[i].running==2) |
246 | { |
247 | pthread_join(threads[i].thread,NULL); |
248 | threads[i].running=0; |
249 | nthreads--; |
250 | } |
251 | |
252 | pthread_mutex_unlock(&running_mutex); |
253 | } |
254 | |
255 | #endif |
256 | |
257 | /* Shortcut if only one file, lucky for us we still have the data in RAM) */ |
258 | |
259 | if(nfiles==1) |
260 | { |
261 | for(i=0;i<threads[0].n;i++) |
262 | { |
263 | if(!keep || keep(threads[0].datap[i],count)) |
264 | { |
265 | WriteFile(fd_out,threads[0].datap[i],itemsize); |
266 | count++; |
267 | } |
268 | } |
269 | |
270 | DeleteFile(threads[0].filename); |
271 | |
272 | goto tidy_and_exit; |
273 | } |
274 | |
275 | /* Check that number of files is less than file size */ |
276 | |
277 | assert(nfiles<nitems); |
278 | |
279 | /* Open all of the temporary files */ |
280 | |
281 | fds=(int*)malloc(nfiles*sizeof(int)); |
282 | |
283 | for(i=0;i<nfiles;i++) |
284 | { |
285 | char *filename=threads[0].filename; |
286 | |
287 | sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,i); |
288 | |
289 | fds[i]=ReOpenFile(filename); |
290 | |
291 | DeleteFile(filename); |
292 | } |
293 | |
294 | /* Perform an n-way merge using a binary heap */ |
295 | |
296 | heap=(int*)malloc((1+nfiles)*sizeof(int)); |
297 | |
298 | data =threads[0].data; |
299 | datap=threads[0].datap; |
300 | |
301 | /* Fill the heap to start with */ |
302 | |
303 | for(i=0;i<nfiles;i++) |
304 | { |
305 | int index; |
306 | |
307 | datap[i]=data+i*itemsize; |
308 | |
309 | ReadFile(fds[i],datap[i],itemsize); |
310 | |
311 | index=i+1; |
312 | |
313 | heap[index]=i; |
314 | |
315 | /* Bubble up the new value */ |
316 | |
317 | while(index>1) |
318 | { |
319 | int newindex; |
320 | int temp; |
321 | |
322 | newindex=index/2; |
323 | |
324 | if(compare(datap[heap[index]],datap[heap[newindex]])>=0) |
325 | break; |
326 | |
327 | temp=heap[index]; |
328 | heap[index]=heap[newindex]; |
329 | heap[newindex]=temp; |
330 | |
331 | index=newindex; |
332 | } |
333 | } |
334 | |
335 | /* Repeatedly pull out the root of the heap and refill from the same file */ |
336 | |
337 | ndata=nfiles; |
338 | |
339 | do |
340 | { |
341 | int index=1; |
342 | |
343 | if(!keep || keep(datap[heap[index]],count)) |
344 | { |
345 | WriteFile(fd_out,datap[heap[index]],itemsize); |
346 | count++; |
347 | } |
348 | |
349 | if(ReadFile(fds[heap[index]],datap[heap[index]],itemsize)) |
350 | { |
351 | heap[index]=heap[ndata]; |
352 | ndata--; |
353 | } |
354 | |
355 | /* Bubble down the new value */ |
356 | |
357 | while((2*index)<ndata) |
358 | { |
359 | int newindex; |
360 | int temp; |
361 | |
362 | newindex=2*index; |
363 | |
364 | if(compare(datap[heap[newindex]],datap[heap[newindex+1]])>=0) |
365 | newindex=newindex+1; |
366 | |
367 | if(compare(datap[heap[index]],datap[heap[newindex]])<=0) |
368 | break; |
369 | |
370 | temp=heap[newindex]; |
371 | heap[newindex]=heap[index]; |
372 | heap[index]=temp; |
373 | |
374 | index=newindex; |
375 | } |
376 | |
377 | if((2*index)==ndata) |
378 | { |
379 | int newindex; |
380 | int temp; |
381 | |
382 | newindex=2*index; |
383 | |
384 | if(compare(datap[heap[index]],datap[heap[newindex]])<=0) |
385 | ; /* break */ |
386 | else |
387 | { |
388 | temp=heap[newindex]; |
389 | heap[newindex]=heap[index]; |
390 | heap[index]=temp; |
391 | } |
392 | } |
393 | } |
394 | while(ndata>0); |
395 | |
396 | /* Tidy up */ |
397 | |
398 | tidy_and_exit: |
399 | |
400 | if(fds) |
401 | { |
402 | for(i=0;i<nfiles;i++) |
403 | CloseFile(fds[i]); |
404 | free(fds); |
405 | } |
406 | |
407 | if(heap) |
408 | free(heap); |
409 | |
410 | for(i=0;i<option_filesort_threads;i++) |
411 | { |
412 | free(threads[i].data); |
413 | free(threads[i].datap); |
414 | |
415 | free(threads[i].filename); |
416 | } |
417 | |
418 | return(count); |
419 | } |
420 | |
421 | |
422 | /*++++++++++++++++++++++++++++++++++++++ |
423 | A function to sort the contents of a file of variable length objects (each |
424 | preceded by its length in FILESORT_VARSIZE bytes) using a limited amount of RAM. |
425 | |
426 | The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort |
427 | and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting. |
428 | The individual sort steps and the merge step both use a "Heap sort" |
429 | http://en.wikipedia.org/wiki/Heapsort. The combination of the two should work well |
430 | if the data is already partially sorted. |
431 | |
432 | index_t filesort_vary Returns the number of objects kept. |
433 | |
434 | int fd_in The file descriptor of the input file (opened for reading and at the beginning). |
435 | |
436 | int fd_out The file descriptor of the output file (opened for writing and empty). |
437 | |
438 | int (*compare)(const void*, const void*) The comparison function (identical to qsort if the |
439 | data to be sorted is an array of things not pointers). |
440 | |
441 | int (*keep)(void *,index_t) If non-NULL then this function is called for each item, if it |
442 | returns 1 then the object is kept and written to the output file. |
443 | ++++++++++++++++++++++++++++++++++++++*/ |
444 | |
445 | index_t filesort_vary(int fd_in,int fd_out,int (*compare)(const void*,const void*),int (*keep)(void*,index_t)) |
446 | { |
447 | int *fds=NULL,*heap=NULL; |
448 | int nfiles=0,ndata=0; |
449 | index_t count=0,total=0; |
450 | size_t datasize=option_filesort_ramsize/option_filesort_threads; |
451 | FILESORT_VARINT nextitemsize,largestitemsize=0; |
452 | void *data,**datap; |
453 | thread_data *threads; |
454 | int i,more=1; |
455 | #if defined(USE_PTHREADS) && USE_PTHREADS |
456 | int nthreads=0; |
457 | #endif |
458 | |
459 | /* Allocate the RAM buffer and other bits */ |
460 | |
461 | threads=(thread_data*)malloc(option_filesort_threads*sizeof(thread_data)); |
462 | |
463 | for(i=0;i<option_filesort_threads;i++) |
464 | { |
465 | threads[i].running=0; |
466 | |
467 | threads[i].data=malloc(datasize); |
468 | threads[i].datap=NULL; |
469 | |
470 | threads[i].filename=(char*)malloc(strlen(option_tmpdirname)+24); |
471 | |
472 | threads[i].compare=compare; |
473 | } |
474 | |
475 | /* Loop around, fill the buffer, sort the data and write a temporary file */ |
476 | |
477 | if(ReadFile(fd_in,&nextitemsize,FILESORT_VARSIZE)) /* Always have the next item size known in advance */ |
478 | goto tidy_and_exit; |
479 | |
480 | do |
481 | { |
482 | size_t ramused=FILESORT_VARALIGN-FILESORT_VARSIZE; |
483 | int thread=0; |
484 | |
485 | #if defined(USE_PTHREADS) && USE_PTHREADS |
486 | |
487 | if(option_filesort_threads>1) |
488 | { |
489 | /* Find a spare slot (one *must* be unused at all times) */ |
490 | |
491 | pthread_mutex_lock(&running_mutex); |
492 | |
493 | for(thread=0;thread<option_filesort_threads;thread++) |
494 | if(!threads[thread].running) |
495 | break; |
496 | |
497 | pthread_mutex_unlock(&running_mutex); |
498 | } |
499 | |
500 | #endif |
501 | |
502 | threads[thread].datap=threads[thread].data+datasize; |
503 | |
504 | threads[thread].n=0; |
505 | |
506 | /* Read in the data and create pointers */ |
507 | |
508 | while((ramused+FILESORT_VARSIZE+nextitemsize)<=((void*)threads[thread].datap-sizeof(void*)-threads[thread].data)) |
509 | { |
510 | FILESORT_VARINT itemsize=nextitemsize; |
511 | |
512 | if(itemsize>largestitemsize) |
513 | largestitemsize=itemsize; |
514 | |
515 | *(FILESORT_VARINT*)(threads[thread].data+ramused)=itemsize; |
516 | |
517 | ramused+=FILESORT_VARSIZE; |
518 | |
519 | ReadFile(fd_in,threads[thread].data+ramused,itemsize); |
520 | |
521 | *--threads[thread].datap=threads[thread].data+ramused; /* points to real data */ |
522 | |
523 | ramused+=itemsize; |
524 | |
525 | ramused =FILESORT_VARALIGN*((ramused+FILESORT_VARSIZE-1)/FILESORT_VARALIGN); |
526 | ramused+=FILESORT_VARALIGN-FILESORT_VARSIZE; |
527 | |
528 | total++; |
529 | threads[thread].n++; |
530 | |
531 | if(ReadFile(fd_in,&nextitemsize,FILESORT_VARSIZE)) |
532 | { |
533 | more=0; |
534 | break; |
535 | } |
536 | } |
537 | |
538 | /* No new data read in this time round */ |
539 | |
540 | if(threads[thread].n==0) |
541 | break; |
542 | |
543 | /* Sort the data pointers using a heap sort (potentially in a thread) */ |
544 | |
545 | sprintf(threads[thread].filename,"%s/filesort.%d.tmp",option_tmpdirname,nfiles); |
546 | |
547 | #if defined(USE_PTHREADS) && USE_PTHREADS |
548 | |
549 | if(option_filesort_threads>1) |
550 | { |
551 | pthread_mutex_lock(&running_mutex); |
552 | |
553 | while(nthreads==(option_filesort_threads-1)) |
554 | { |
555 | for(i=0;i<option_filesort_threads;i++) |
556 | if(threads[i].running==2) |
557 | { |
558 | pthread_join(threads[i].thread,NULL); |
559 | threads[i].running=0; |
560 | nthreads--; |
561 | } |
562 | |
563 | if(nthreads==(option_filesort_threads-1)) |
564 | pthread_cond_wait(&running_cond,&running_mutex); |
565 | } |
566 | |
567 | threads[thread].running=1; |
568 | |
569 | pthread_mutex_unlock(&running_mutex); |
570 | |
571 | pthread_create(&threads[thread].thread,NULL,(void* (*)(void*))filesort_vary_heapsort_thread,&threads[thread]); |
572 | |
573 | nthreads++; |
574 | } |
575 | else |
576 | |
577 | #else |
578 | |
579 | filesort_vary_heapsort_thread(&threads[thread]); |
580 | |
581 | #endif |
582 | |
583 | nfiles++; |
584 | } |
585 | while(more); |
586 | |
587 | /* Wait for all of the threads to finish */ |
588 | |
589 | #if defined(USE_PTHREADS) && USE_PTHREADS |
590 | |
591 | while(option_filesort_threads>1 && nthreads) |
592 | { |
593 | pthread_mutex_lock(&running_mutex); |
594 | |
595 | pthread_cond_wait(&running_cond,&running_mutex); |
596 | |
597 | for(i=0;i<option_filesort_threads;i++) |
598 | if(threads[i].running==2) |
599 | { |
600 | pthread_join(threads[i].thread,NULL); |
601 | threads[i].running=0; |
602 | nthreads--; |
603 | } |
604 | |
605 | pthread_mutex_unlock(&running_mutex); |
606 | } |
607 | |
608 | #endif |
609 | |
610 | /* Shortcut if only one file, lucky for us we still have the data in RAM) */ |
611 | |
612 | if(nfiles==1) |
613 | { |
614 | for(i=0;i<threads[0].n;i++) |
615 | { |
616 | if(!keep || keep(threads[0].datap[i],count)) |
617 | { |
618 | FILESORT_VARINT itemsize=*(FILESORT_VARINT*)(threads[0].datap[i]-FILESORT_VARSIZE); |
619 | |
620 | WriteFile(fd_out,threads[0].datap[i]-FILESORT_VARSIZE,itemsize+FILESORT_VARSIZE); |
621 | count++; |
622 | } |
623 | } |
624 | |
625 | DeleteFile(threads[0].filename); |
626 | |
627 | goto tidy_and_exit; |
628 | } |
629 | |
630 | /* Check that number of files is less than file size */ |
631 | |
632 | largestitemsize=FILESORT_VARALIGN*(1+(largestitemsize+FILESORT_VARALIGN-FILESORT_VARSIZE)/FILESORT_VARALIGN); |
633 | |
634 | assert(nfiles<((datasize-nfiles*sizeof(void*))/largestitemsize)); |
635 | |
636 | /* Open all of the temporary files */ |
637 | |
638 | fds=(int*)malloc(nfiles*sizeof(int)); |
639 | |
640 | for(i=0;i<nfiles;i++) |
641 | { |
642 | char *filename=threads[0].filename; |
643 | |
644 | sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,i); |
645 | |
646 | fds[i]=ReOpenFile(filename); |
647 | |
648 | DeleteFile(filename); |
649 | } |
650 | |
651 | /* Perform an n-way merge using a binary heap */ |
652 | |
653 | heap=(int*)malloc((1+nfiles)*sizeof(int)); |
654 | |
655 | data=threads[0].data; |
656 | datap=data+datasize-nfiles*sizeof(void*); |
657 | |
658 | /* Fill the heap to start with */ |
659 | |
660 | for(i=0;i<nfiles;i++) |
661 | { |
662 | int index; |
663 | FILESORT_VARINT itemsize; |
664 | |
665 | datap[i]=data+FILESORT_VARALIGN-FILESORT_VARSIZE+i*largestitemsize; |
666 | |
667 | ReadFile(fds[i],&itemsize,FILESORT_VARSIZE); |
668 | |
669 | *(FILESORT_VARINT*)(datap[i]-FILESORT_VARSIZE)=itemsize; |
670 | |
671 | ReadFile(fds[i],datap[i],itemsize); |
672 | |
673 | index=i+1; |
674 | |
675 | heap[index]=i; |
676 | |
677 | /* Bubble up the new value */ |
678 | |
679 | while(index>1) |
680 | { |
681 | int newindex; |
682 | int temp; |
683 | |
684 | newindex=index/2; |
685 | |
686 | if(compare(datap[heap[index]],datap[heap[newindex]])>=0) |
687 | break; |
688 | |
689 | temp=heap[index]; |
690 | heap[index]=heap[newindex]; |
691 | heap[newindex]=temp; |
692 | |
693 | index=newindex; |
694 | } |
695 | } |
696 | |
697 | /* Repeatedly pull out the root of the heap and refill from the same file */ |
698 | |
699 | ndata=nfiles; |
700 | |
701 | do |
702 | { |
703 | int index=1; |
704 | FILESORT_VARINT itemsize; |
705 | |
706 | if(!keep || keep(datap[heap[index]],count)) |
707 | { |
708 | itemsize=*(FILESORT_VARINT*)(datap[heap[index]]-FILESORT_VARSIZE); |
709 | |
710 | WriteFile(fd_out,datap[heap[index]]-FILESORT_VARSIZE,itemsize+FILESORT_VARSIZE); |
711 | count++; |
712 | } |
713 | |
714 | if(ReadFile(fds[heap[index]],&itemsize,FILESORT_VARSIZE)) |
715 | { |
716 | heap[index]=heap[ndata]; |
717 | ndata--; |
718 | } |
719 | else |
720 | { |
721 | *(FILESORT_VARINT*)(datap[heap[index]]-FILESORT_VARSIZE)=itemsize; |
722 | |
723 | ReadFile(fds[heap[index]],datap[heap[index]],itemsize); |
724 | } |
725 | |
726 | /* Bubble down the new value */ |
727 | |
728 | while((2*index)<ndata) |
729 | { |
730 | int newindex; |
731 | int temp; |
732 | |
733 | newindex=2*index; |
734 | |
735 | if(compare(datap[heap[newindex]],datap[heap[newindex+1]])>=0) |
736 | newindex=newindex+1; |
737 | |
738 | if(compare(datap[heap[index]],datap[heap[newindex]])<=0) |
739 | break; |
740 | |
741 | temp=heap[newindex]; |
742 | heap[newindex]=heap[index]; |
743 | heap[index]=temp; |
744 | |
745 | index=newindex; |
746 | } |
747 | |
748 | if((2*index)==ndata) |
749 | { |
750 | int newindex; |
751 | int temp; |
752 | |
753 | newindex=2*index; |
754 | |
755 | if(compare(datap[heap[index]],datap[heap[newindex]])<=0) |
756 | ; /* break */ |
757 | else |
758 | { |
759 | temp=heap[newindex]; |
760 | heap[newindex]=heap[index]; |
761 | heap[index]=temp; |
762 | } |
763 | } |
764 | } |
765 | while(ndata>0); |
766 | |
767 | /* Tidy up */ |
768 | |
769 | tidy_and_exit: |
770 | |
771 | if(fds) |
772 | { |
773 | for(i=0;i<nfiles;i++) |
774 | CloseFile(fds[i]); |
775 | free(fds); |
776 | } |
777 | |
778 | if(heap) |
779 | free(heap); |
780 | |
781 | for(i=0;i<option_filesort_threads;i++) |
782 | { |
783 | free(threads[i].data); |
784 | |
785 | free(threads[i].filename); |
786 | } |
787 | |
788 | return(count); |
789 | } |
790 | |
791 | |
792 | /*++++++++++++++++++++++++++++++++++++++ |
793 | A wrapper function that can be run in a thread for fixed data. |
794 | |
795 | void *filesort_fixed_heapsort_thread Returns NULL (required to return void*). |
796 | |
797 | thread_data *thread The data to be processed in this thread. |
798 | ++++++++++++++++++++++++++++++++++++++*/ |
799 | |
800 | static void *filesort_fixed_heapsort_thread(thread_data *thread) |
801 | { |
802 | int fd,i; |
803 | |
804 | /* Sort the data pointers using a heap sort */ |
805 | |
806 | filesort_heapsort(thread->datap,thread->n,thread->compare); |
807 | |
808 | /* Create a temporary file and write the result */ |
809 | |
810 | fd=OpenFileNew(thread->filename); |
811 | |
812 | for(i=0;i<thread->n;i++) |
813 | WriteFile(fd,thread->datap[i],thread->itemsize); |
814 | |
815 | CloseFile(fd); |
816 | |
817 | #if defined(USE_PTHREADS) && USE_PTHREADS |
818 | |
819 | if(option_filesort_threads>1) |
820 | { |
821 | pthread_mutex_lock(&running_mutex); |
822 | |
823 | thread->running=2; |
824 | |
825 | pthread_cond_signal(&running_cond); |
826 | |
827 | pthread_mutex_unlock(&running_mutex); |
828 | } |
829 | |
830 | #endif |
831 | |
832 | return(NULL); |
833 | } |
834 | |
835 | |
836 | /*++++++++++++++++++++++++++++++++++++++ |
837 | A wrapper function that can be run in a thread for variable data. |
838 | |
839 | void *filesort_vary_heapsort_thread Returns NULL (required to return void*). |
840 | |
841 | thread_data *thread The data to be processed in this thread. |
842 | ++++++++++++++++++++++++++++++++++++++*/ |
843 | |
844 | static void *filesort_vary_heapsort_thread(thread_data *thread) |
845 | { |
846 | int fd,i; |
847 | |
848 | /* Sort the data pointers using a heap sort */ |
849 | |
850 | filesort_heapsort(thread->datap,thread->n,thread->compare); |
851 | |
852 | /* Create a temporary file and write the result */ |
853 | |
854 | fd=OpenFileNew(thread->filename); |
855 | |
856 | for(i=0;i<thread->n;i++) |
857 | { |
858 | FILESORT_VARINT itemsize=*(FILESORT_VARINT*)(thread->datap[i]-FILESORT_VARSIZE); |
859 | |
860 | WriteFile(fd,thread->datap[i]-FILESORT_VARSIZE,itemsize+FILESORT_VARSIZE); |
861 | } |
862 | |
863 | CloseFile(fd); |
864 | |
865 | #if defined(USE_PTHREADS) && USE_PTHREADS |
866 | |
867 | if(option_filesort_threads>1) |
868 | { |
869 | pthread_mutex_lock(&running_mutex); |
870 | |
871 | thread->running=2; |
872 | |
873 | pthread_cond_signal(&running_cond); |
874 | |
875 | pthread_mutex_unlock(&running_mutex); |
876 | } |
877 | |
878 | #endif |
879 | |
880 | return(NULL); |
881 | } |
882 | |
883 | |
884 | /*++++++++++++++++++++++++++++++++++++++ |
885 | A function to sort an array of pointers efficiently. |
886 | |
887 | The data is sorted using a "Heap sort" http://en.wikipedia.org/wiki/Heapsort, |
888 | in particular, this is good because it can operate in-place and doesn't |
889 | allocate more memory like using qsort() does. |
890 | |
891 | void **datap A pointer to the array of pointers to sort. |
892 | |
893 | size_t nitems The number of items of data to sort. |
894 | |
895 | int(*compare)(const void *, const void *) The comparison function (identical to qsort if the |
896 | data to be sorted was an array of things not pointers). |
897 | ++++++++++++++++++++++++++++++++++++++*/ |
898 | |
899 | void filesort_heapsort(void **datap,size_t nitems,int(*compare)(const void*, const void*)) |
900 | { |
901 | void **datap1=&datap[-1]; |
902 | int i; |
903 | |
904 | /* Fill the heap by pretending to insert the data that is already there */ |
905 | |
906 | for(i=2;i<=nitems;i++) |
907 | { |
908 | int index=i; |
909 | |
910 | /* Bubble up the new value (upside-down, put largest at top) */ |
911 | |
912 | while(index>1) |
913 | { |
914 | int newindex; |
915 | void *temp; |
916 | |
917 | newindex=index/2; |
918 | |
919 | if(compare(datap1[index],datap1[newindex])<=0) /* reversed compared to filesort_fixed() above */ |
920 | break; |
921 | |
922 | temp=datap1[index]; |
923 | datap1[index]=datap1[newindex]; |
924 | datap1[newindex]=temp; |
925 | |
926 | index=newindex; |
927 | } |
928 | } |
929 | |
930 | /* Repeatedly pull out the root of the heap and swap with the bottom item */ |
931 | |
932 | for(i=nitems;i>1;i--) |
933 | { |
934 | int index=1; |
935 | void *temp; |
936 | |
937 | temp=datap1[index]; |
938 | datap1[index]=datap1[i]; |
939 | datap1[i]=temp; |
940 | |
941 | /* Bubble down the new value (upside-down, put largest at top) */ |
942 | |
943 | while((2*index)<(i-1)) |
944 | { |
945 | int newindex; |
946 | void *temp; |
947 | |
948 | newindex=2*index; |
949 | |
950 | if(compare(datap1[newindex],datap1[newindex+1])<=0) /* reversed compared to filesort_fixed() above */ |
951 | newindex=newindex+1; |
952 | |
953 | if(compare(datap1[index],datap1[newindex])>=0) /* reversed compared to filesort_fixed() above */ |
954 | break; |
955 | |
956 | temp=datap1[newindex]; |
957 | datap1[newindex]=datap1[index]; |
958 | datap1[index]=temp; |
959 | |
960 | index=newindex; |
961 | } |
962 | |
963 | if((2*index)==(i-1)) |
964 | { |
965 | int newindex; |
966 | void *temp; |
967 | |
968 | newindex=2*index; |
969 | |
970 | if(compare(datap1[index],datap1[newindex])>=0) /* reversed compared to filesort_fixed() above */ |
971 | ; /* break */ |
972 | else |
973 | { |
974 | temp=datap1[newindex]; |
975 | datap1[newindex]=datap1[index]; |
976 | datap1[index]=temp; |
977 | } |
978 | } |
979 | } |
980 | } |
Properties
Name | Value |
---|---|
cvs:description | Functions to perform sorting. |