Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Annotation of /trunk/src/sorting.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 284 - (hide annotations) (download) (as text)
Mon Oct 12 17:35:26 2009 UTC (15 years, 5 months ago) by amb
File MIME type: text/x-csrc
File size: 9091 byte(s)
Rename the tmpdirname variable.

1 amb 269 /***************************************
2 amb 284 $Header: /home/amb/CVS/routino/src/sorting.c,v 1.4 2009-10-12 17:35:26 amb Exp $
3 amb 269
4     Merge sort functions.
5    
6     Part of the Routino routing software.
7     ******************/ /******************
8     This file Copyright 2009 Andrew M. Bishop
9    
10     This program is free software: you can redistribute it and/or modify
11     it under the terms of the GNU Affero General Public License as published by
12     the Free Software Foundation, either version 3 of the License, or
13     (at your option) any later version.
14    
15     This program is distributed in the hope that it will be useful,
16     but WITHOUT ANY WARRANTY; without even the implied warranty of
17     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18     GNU Affero General Public License for more details.
19    
20     You should have received a copy of the GNU Affero General Public License
21     along with this program. If not, see <http://www.gnu.org/licenses/>.
22     ***************************************/
23    
24    
25     #include <stdio.h>
26     #include <stdlib.h>
27     #include <string.h>
28     #include <assert.h>
29    
30     #include "functions.h"
31    
32    
33     /* Variables */
34    
35 amb 284 extern char *option_tmpdirname;
36 amb 269
37    
38     /*++++++++++++++++++++++++++++++++++++++
39     A function to sort the contents of a file using a limited amount of RAM.
40    
41     The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort
42     and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting.
43     The individual sort steps and the merge step both use a "Heap sort"
44     http://en.wikipedia.org/wiki/Heapsort. The combination of the two should work well
45     if the data is already partially sorted.
46    
47     int fd_in The file descriptor of the input file (opened for reading and at the beginning).
48    
49     int fd_out The file descriptor of the output file (opened for writing and empty).
50    
51     size_t itemsize The size of each item in the file that needs sorting.
52    
53     size_t ramsize The maximum in-core buffer size to use when sorting.
54    
55     int (*compare)(const void*, const void*) The comparison function (identical to qsort if the
56     data to be sorted is an array of things not pointers).
57    
58 amb 274 int (*buildindex)(void *,index_t) If non-NULL then this function is called for each item, if it
59     returns 1 then it is written to the output file.
60 amb 269 ++++++++++++++++++++++++++++++++++++++*/
61    
62     void filesort(int fd_in,int fd_out,size_t itemsize,size_t ramsize,int (*compare)(const void*,const void*),
63 amb 274 int (*buildindex)(void*,index_t))
64 amb 269 {
65     int *fds=NULL,*heap=NULL;
66     int nfiles=0,ndata=0;
67     index_t count=0,total=0;
68     size_t nitems=ramsize/(itemsize+sizeof(void*));
69 amb 283 void *data=NULL,**datap=NULL;
70 amb 269 char *filename;
71     int i,more=1;
72    
73     /* Allocate the RAM buffer and other bits */
74    
75     data=malloc(nitems*itemsize);
76     datap=malloc(nitems*sizeof(void*));
77    
78 amb 284 filename=(char*)malloc(strlen(option_tmpdirname)+24);
79 amb 269
80     /* Loop around, fill the buffer, sort the data and write a temporary file */
81    
82     do
83     {
84     int fd,n=0;
85    
86     /* Read in the data and create pointers */
87    
88     for(i=0;i<nitems;i++)
89     {
90     datap[i]=data+i*itemsize;
91    
92     if(ReadFile(fd_in,datap[i],itemsize))
93     {
94     more=0;
95     break;
96     }
97    
98     total++;
99     }
100    
101     n=i;
102    
103     if(n==0)
104     break;
105    
106     /* Sort the data pointers using a heap sort */
107    
108     heapsort(datap,n,compare);
109    
110     /* Shortcut if all read in and sorted at once */
111    
112     if(nfiles==0 && n<nitems)
113     {
114     for(i=0;i<n;i++)
115     {
116 amb 274 if(!buildindex || buildindex(datap[i],count))
117     {
118     WriteFile(fd_out,datap[i],itemsize);
119     count++;
120     }
121 amb 269 }
122    
123 amb 283 goto tidy_and_exit;
124 amb 269 }
125    
126     /* Create a temporary file and write the result */
127    
128 amb 284 sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,nfiles);
129 amb 269
130     fd=OpenFile(filename);
131    
132     for(i=0;i<n;i++)
133     WriteFile(fd,datap[i],itemsize);
134    
135     CloseFile(fd);
136    
137     nfiles++;
138     }
139     while(more);
140    
141     /* Shortcut if only one file (unlucky for us there must have been exactly
142     nitems, lucky for us we still have the data in RAM) */
143    
144     if(nfiles==1)
145     {
146     for(i=0;i<nitems;i++)
147     {
148 amb 274 if(!buildindex || buildindex(datap[i],count))
149     {
150     WriteFile(fd_out,datap[i],itemsize);
151     count++;
152     }
153 amb 269 }
154    
155     DeleteFile(filename);
156    
157 amb 283 goto tidy_and_exit;
158 amb 269 }
159    
160     /* Check that number of files is less than file size */
161    
162     assert(nfiles<nitems);
163    
164     /* Open all of the temporary files */
165    
166     fds=(int*)malloc(nfiles*sizeof(int));
167    
168     for(i=0;i<nfiles;i++)
169     {
170 amb 284 sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,i);
171 amb 269
172     fds[i]=ReOpenFile(filename);
173    
174     DeleteFile(filename);
175     }
176    
177     /* Perform an n-way merge using a binary heap */
178    
179     heap=(int*)malloc(nfiles*sizeof(int));
180    
181     /* Fill the heap to start with */
182    
183     for(i=0;i<nfiles;i++)
184     {
185     int index;
186    
187     datap[i]=data+i*itemsize;
188    
189     ReadFile(fds[i],datap[i],itemsize);
190    
191     heap[i]=i;
192    
193     index=i;
194    
195     /* Bubble up the new value */
196    
197     while(index>0 &&
198     compare(datap[heap[index]],datap[heap[(index-1)/2]])<0)
199     {
200     int newindex;
201     int temp;
202    
203     newindex=(index-1)/2;
204    
205     temp=heap[index];
206     heap[index]=heap[newindex];
207     heap[newindex]=temp;
208    
209     index=newindex;
210     }
211     }
212    
213     /* Repeatedly pull out the root of the heap and refill from the same file */
214    
215     ndata=nfiles;
216    
217     do
218     {
219     int index=0;
220    
221 amb 274 if(!buildindex || buildindex(datap[heap[0]],count))
222     {
223     WriteFile(fd_out,datap[heap[0]],itemsize);
224     count++;
225     }
226 amb 269
227     if(ReadFile(fds[heap[0]],datap[heap[0]],itemsize))
228     {
229     ndata--;
230     heap[0]=heap[ndata];
231     }
232    
233     /* Bubble down the new value */
234    
235     while((2*index+2)<ndata &&
236     (compare(datap[heap[index]],datap[heap[2*index+1]])>0 ||
237     compare(datap[heap[index]],datap[heap[2*index+2]])>0))
238     {
239     int newindex;
240     int temp;
241    
242     if(compare(datap[heap[2*index+1]],datap[heap[2*index+2]])<0)
243     newindex=2*index+1;
244     else
245     newindex=2*index+2;
246    
247     temp=heap[newindex];
248     heap[newindex]=heap[index];
249     heap[index]=temp;
250    
251     index=newindex;
252     }
253    
254     if((2*index+2)==ndata &&
255     compare(datap[heap[index]],datap[heap[2*index+1]])>0)
256     {
257     int newindex;
258     int temp;
259    
260     newindex=2*index+1;
261    
262     temp=heap[newindex];
263     heap[newindex]=heap[index];
264     heap[index]=temp;
265     }
266     }
267     while(ndata>0);
268    
269     /* Tidy up */
270    
271 amb 283 tidy_and_exit:
272 amb 269
273 amb 283 if(fds)
274     {
275     for(i=0;i<nfiles;i++)
276     CloseFile(fds[i]);
277     free(fds);
278     }
279    
280     if(heap)
281     free(heap);
282    
283 amb 269 free(data);
284     free(datap);
285     free(filename);
286     }
287    
288    
289     /*++++++++++++++++++++++++++++++++++++++
290     A function to sort an array of pointers efficiently.
291    
292     The data is sorted using a "Heap sort" http://en.wikipedia.org/wiki/Heapsort,
293     in particular an this good because it can operate in-place and doesn't
294     allocate more memory like using qsort() does.
295    
296     void **datap A pointer to the array of pointers to sort.
297    
298     size_t nitems The number of items of data to sort.
299    
300     int(*compare)(const void *, const void *) The comparison function (identical to qsort if the
301     data to be sorted was an array of things not pointers).
302     ++++++++++++++++++++++++++++++++++++++*/
303    
304     void heapsort(void **datap,size_t nitems,int(*compare)(const void*, const void*))
305     {
306     int i;
307    
308     /* Fill the heap by pretending to insert the data that is already there */
309    
310     for(i=1;i<nitems;i++)
311     {
312     int index=i;
313    
314     /* Bubble up the new value (upside-down, put largest at top) */
315    
316     while(index>0 &&
317     compare(datap[index],datap[(index-1)/2])>0) /* reversed compared to filesort() above */
318     {
319     int newindex;
320     void *temp;
321    
322     newindex=(index-1)/2;
323    
324     temp=datap[index];
325     datap[index]=datap[newindex];
326     datap[newindex]=temp;
327    
328     index=newindex;
329     }
330     }
331    
332     /* Repeatedly pull out the root of the heap and swap with the bottom item */
333    
334     for(i=nitems-1;i>0;i--)
335     {
336     int index=0;
337     void *temp;
338    
339     temp=datap[index];
340     datap[index]=datap[i];
341     datap[i]=temp;
342    
343     /* Bubble down the new value (upside-down, put largest at top) */
344    
345     while((2*index+2)<i &&
346     (compare(datap[index],datap[2*index+1])<0 || /* reversed compared to filesort() above */
347     compare(datap[index],datap[2*index+2])<0)) /* reversed compared to filesort() above */
348     {
349     int newindex;
350     void *temp;
351    
352     if(compare(datap[2*index+1],datap[2*index+2])>0) /* reversed compared to filesort() above */
353     newindex=2*index+1;
354     else
355     newindex=2*index+2;
356    
357     temp=datap[newindex];
358     datap[newindex]=datap[index];
359     datap[index]=temp;
360    
361     index=newindex;
362     }
363    
364     if((2*index+2)==i &&
365     compare(datap[index],datap[2*index+1])<0) /* reversed compared to filesort() above */
366     {
367     int newindex;
368     void *temp;
369    
370     newindex=2*index+1;
371    
372     temp=datap[newindex];
373     datap[newindex]=datap[index];
374     datap[index]=temp;
375     }
376     }
377     }

Properties

Name Value
cvs:description Functions to perform sorting.