Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Annotation of /trunk/src/sorting.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 269 - (hide annotations) (download) (as text)
Sun Oct 4 10:44:51 2009 UTC (15 years, 6 months ago) by amb
File MIME type: text/x-csrc
File size: 8865 byte(s)
Initial revision

1 amb 269 /***************************************
2     $Header: /home/amb/CVS/routino/src/sorting.c,v 1.1 2009-10-04 10:44:51 amb Exp $
3    
4     Merge sort functions.
5    
6     Part of the Routino routing software.
7     ******************/ /******************
8     This file Copyright 2009 Andrew M. Bishop
9    
10     This program is free software: you can redistribute it and/or modify
11     it under the terms of the GNU Affero General Public License as published by
12     the Free Software Foundation, either version 3 of the License, or
13     (at your option) any later version.
14    
15     This program is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18     GNU Affero General Public License for more details.
19    
20     You should have received a copy of the GNU Affero General Public License
21     along with this program. If not, see <http://www.gnu.org/licenses/>.
22     ***************************************/
23    
24    
25     #include <stdio.h>
26     #include <stdlib.h>
27     #include <string.h>
28     #include <assert.h>
29    
30     #include "functions.h"
31    
32    
33     /* Variables */
34    
35     extern char *tmpdirname;
36    
37    
38     /*++++++++++++++++++++++++++++++++++++++
39     A function to sort the contents of a file using a limited amount of RAM.
40    
41     The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort
42     and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting.
43     The individual sort steps and the merge step both use a "Heap sort"
44     http://en.wikipedia.org/wiki/Heapsort. The combination of the two should work well
45     if the data is already partially sorted.
46    
47     int fd_in The file descriptor of the input file (opened for reading and at the beginning).
48    
49     int fd_out The file descriptor of the output file (opened for writing and empty).
50    
51     size_t itemsize The size of each item in the file that needs sorting.
52    
53     size_t ramsize The maximum in-core buffer size to use when sorting.
54    
55     int (*compare)(const void*, const void*) The comparison function (identical to qsort if the
56     data to be sorted is an array of things not pointers).
57    
58     void (buildindex)(void *,index_t,index_t) If non-NULL then this function is called for each item as
59     it is written to the output file.
60     ++++++++++++++++++++++++++++++++++++++*/
61    
62     void filesort(int fd_in,int fd_out,size_t itemsize,size_t ramsize,int (*compare)(const void*,const void*),
63     void (*buildindex)(void*,index_t,index_t))
64     {
65     int *fds=NULL,*heap=NULL;
66     int nfiles=0,ndata=0;
67     index_t count=0,total=0;
68     size_t nitems=ramsize/(itemsize+sizeof(void*));
69     void *data,**datap;
70     char *filename;
71     int i,more=1;
72    
73     /* Allocate the RAM buffer and other bits */
74    
75     data=malloc(nitems*itemsize);
76     datap=malloc(nitems*sizeof(void*));
77    
78     filename=(char*)malloc(strlen(tmpdirname)+24);
79    
80     /* Loop around, fill the buffer, sort the data and write a temporary file */
81    
82     do
83     {
84     int fd,n=0;
85    
86     /* Read in the data and create pointers */
87    
88     for(i=0;i<nitems;i++)
89     {
90     datap[i]=data+i*itemsize;
91    
92     if(ReadFile(fd_in,datap[i],itemsize))
93     {
94     more=0;
95     break;
96     }
97    
98     total++;
99     }
100    
101     n=i;
102    
103     if(n==0)
104     break;
105    
106     /* Sort the data pointers using a heap sort */
107    
108     heapsort(datap,n,compare);
109    
110     /* Shortcut if all read in and sorted at once */
111    
112     if(nfiles==0 && n<nitems)
113     {
114     for(i=0;i<n;i++)
115     {
116     if(buildindex)
117     buildindex(datap[i],i,n);
118    
119     WriteFile(fd_out,datap[i],itemsize);
120     }
121    
122     return;
123     }
124    
125     /* Create a temporary file and write the result */
126    
127     sprintf(filename,"%s/filesort.%d.tmp",tmpdirname,nfiles);
128    
129     fd=OpenFile(filename);
130    
131     for(i=0;i<n;i++)
132     WriteFile(fd,datap[i],itemsize);
133    
134     CloseFile(fd);
135    
136     nfiles++;
137     }
138     while(more);
139    
140     /* Shortcut if only one file (unlucky for us there must have been exactly
141     nitems, lucky for us we still have the data in RAM) */
142    
143     if(nfiles==1)
144     {
145     for(i=0;i<nitems;i++)
146     {
147     if(buildindex)
148     buildindex(datap[i],i,nitems);
149    
150     WriteFile(fd_out,datap[i],itemsize);
151     }
152    
153     DeleteFile(filename);
154    
155     return;
156     }
157    
158     /* Check that number of files is less than file size */
159    
160     assert(nfiles<nitems);
161    
162     /* Open all of the temporary files */
163    
164     fds=(int*)malloc(nfiles*sizeof(int));
165    
166     for(i=0;i<nfiles;i++)
167     {
168     sprintf(filename,"%s/filesort.%d.tmp",tmpdirname,i);
169    
170     fds[i]=ReOpenFile(filename);
171    
172     DeleteFile(filename);
173     }
174    
175     /* Perform an n-way merge using a binary heap */
176    
177     heap=(int*)malloc(nfiles*sizeof(int));
178    
179     /* Fill the heap to start with */
180    
181     for(i=0;i<nfiles;i++)
182     {
183     int index;
184    
185     datap[i]=data+i*itemsize;
186    
187     ReadFile(fds[i],datap[i],itemsize);
188    
189     heap[i]=i;
190    
191     index=i;
192    
193     /* Bubble up the new value */
194    
195     while(index>0 &&
196     compare(datap[heap[index]],datap[heap[(index-1)/2]])<0)
197     {
198     int newindex;
199     int temp;
200    
201     newindex=(index-1)/2;
202    
203     temp=heap[index];
204     heap[index]=heap[newindex];
205     heap[newindex]=temp;
206    
207     index=newindex;
208     }
209     }
210    
211     /* Repeatedly pull out the root of the heap and refill from the same file */
212    
213     ndata=nfiles;
214    
215     do
216     {
217     int index=0;
218    
219     if(buildindex)
220     buildindex(datap[heap[0]],count++,total);
221    
222     WriteFile(fd_out,datap[heap[0]],itemsize);
223    
224     if(ReadFile(fds[heap[0]],datap[heap[0]],itemsize))
225     {
226     ndata--;
227     heap[0]=heap[ndata];
228     }
229    
230     /* Bubble down the new value */
231    
232     while((2*index+2)<ndata &&
233     (compare(datap[heap[index]],datap[heap[2*index+1]])>0 ||
234     compare(datap[heap[index]],datap[heap[2*index+2]])>0))
235     {
236     int newindex;
237     int temp;
238    
239     if(compare(datap[heap[2*index+1]],datap[heap[2*index+2]])<0)
240     newindex=2*index+1;
241     else
242     newindex=2*index+2;
243    
244     temp=heap[newindex];
245     heap[newindex]=heap[index];
246     heap[index]=temp;
247    
248     index=newindex;
249     }
250    
251     if((2*index+2)==ndata &&
252     compare(datap[heap[index]],datap[heap[2*index+1]])>0)
253     {
254     int newindex;
255     int temp;
256    
257     newindex=2*index+1;
258    
259     temp=heap[newindex];
260     heap[newindex]=heap[index];
261     heap[index]=temp;
262     }
263     }
264     while(ndata>0);
265    
266     /* Tidy up */
267    
268     for(i=0;i<nfiles;i++)
269     CloseFile(fds[i]);
270    
271     free(heap);
272     free(data);
273     free(datap);
274     free(filename);
275     }
276    
277    
278     /*++++++++++++++++++++++++++++++++++++++
279     A function to sort an array of pointers efficiently.
280    
281     The data is sorted using a "Heap sort" http://en.wikipedia.org/wiki/Heapsort,
282     in particular an this good because it can operate in-place and doesn't
283     allocate more memory like using qsort() does.
284    
285     void **datap A pointer to the array of pointers to sort.
286    
287     size_t nitems The number of items of data to sort.
288    
289     int(*compare)(const void *, const void *) The comparison function (identical to qsort if the
290     data to be sorted was an array of things not pointers).
291     ++++++++++++++++++++++++++++++++++++++*/
292    
293     void heapsort(void **datap,size_t nitems,int(*compare)(const void*, const void*))
294     {
295     int i;
296    
297     /* Fill the heap by pretending to insert the data that is already there */
298    
299     for(i=1;i<nitems;i++)
300     {
301     int index=i;
302    
303     /* Bubble up the new value (upside-down, put largest at top) */
304    
305     while(index>0 &&
306     compare(datap[index],datap[(index-1)/2])>0) /* reversed compared to filesort() above */
307     {
308     int newindex;
309     void *temp;
310    
311     newindex=(index-1)/2;
312    
313     temp=datap[index];
314     datap[index]=datap[newindex];
315     datap[newindex]=temp;
316    
317     index=newindex;
318     }
319     }
320    
321     /* Repeatedly pull out the root of the heap and swap with the bottom item */
322    
323     for(i=nitems-1;i>0;i--)
324     {
325     int index=0;
326     void *temp;
327    
328     temp=datap[index];
329     datap[index]=datap[i];
330     datap[i]=temp;
331    
332     /* Bubble down the new value (upside-down, put largest at top) */
333    
334     while((2*index+2)<i &&
335     (compare(datap[index],datap[2*index+1])<0 || /* reversed compared to filesort() above */
336     compare(datap[index],datap[2*index+2])<0)) /* reversed compared to filesort() above */
337     {
338     int newindex;
339     void *temp;
340    
341     if(compare(datap[2*index+1],datap[2*index+2])>0) /* reversed compared to filesort() above */
342     newindex=2*index+1;
343     else
344     newindex=2*index+2;
345    
346     temp=datap[newindex];
347     datap[newindex]=datap[index];
348     datap[index]=temp;
349    
350     index=newindex;
351     }
352    
353     if((2*index+2)==i &&
354     compare(datap[index],datap[2*index+1])<0) /* reversed compared to filesort() above */
355     {
356     int newindex;
357     void *temp;
358    
359     newindex=2*index+1;
360    
361     temp=datap[newindex];
362     datap[newindex]=datap[index];
363     datap[index]=temp;
364     }
365     }
366     }

Properties

Name Value
cvs:description Functions to perform sorting.