Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/sorting.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 289 - (show annotations) (download) (as text)
Thu Oct 22 18:17:51 2009 UTC (15 years, 4 months ago) by amb
File MIME type: text/x-csrc
File size: 9156 byte(s)
Added some missing comments and corrected some existing ones.

1 /***************************************
2 $Header: /home/amb/CVS/routino/src/sorting.c,v 1.5 2009-10-22 18:17:51 amb Exp $
3
4 Merge sort functions.
5
6 Part of the Routino routing software.
7 ******************/ /******************
8 This file Copyright 2009 Andrew M. Bishop
9
10 This program is free software: you can redistribute it and/or modify
11 it under the terms of the GNU Affero General Public License as published by
12 the Free Software Foundation, either version 3 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU Affero General Public License for more details.
19
20 You should have received a copy of the GNU Affero General Public License
21 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 ***************************************/
23
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <assert.h>
29
30 #include "functions.h"
31
32
33 /* Variables */
34
35 /*+ The command line '--tmpdir' option or its default value. +*/
36 extern char *option_tmpdirname;
37
38
39 /*++++++++++++++++++++++++++++++++++++++
40 A function to sort the contents of a file using a limited amount of RAM.
41
42 The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort
43 and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting.
44 The individual sort steps and the merge step both use a "Heap sort"
45 http://en.wikipedia.org/wiki/Heapsort. The combination of the two should work well
46 if the data is already partially sorted.
47
48 int fd_in The file descriptor of the input file (opened for reading and at the beginning).
49
50 int fd_out The file descriptor of the output file (opened for writing and empty).
51
52 size_t itemsize The size of each item in the file that needs sorting.
53
54 size_t ramsize The maximum in-core buffer size to use when sorting.
55
56 int (*compare)(const void*, const void*) The comparison function (identical to qsort if the
57 data to be sorted is an array of things not pointers).
58
59 int (*buildindex)(void *,index_t) If non-NULL then this function is called for each item, if it
60 returns 1 then it is written to the output file.
61 ++++++++++++++++++++++++++++++++++++++*/
62
63 void filesort(int fd_in,int fd_out,size_t itemsize,size_t ramsize,int (*compare)(const void*,const void*),
64 int (*buildindex)(void*,index_t))
65 {
66 int *fds=NULL,*heap=NULL;
67 int nfiles=0,ndata=0;
68 index_t count=0,total=0;
69 size_t nitems=ramsize/(itemsize+sizeof(void*));
70 void *data=NULL,**datap=NULL;
71 char *filename;
72 int i,more=1;
73
74 /* Allocate the RAM buffer and other bits */
75
76 data=malloc(nitems*itemsize);
77 datap=malloc(nitems*sizeof(void*));
78
79 filename=(char*)malloc(strlen(option_tmpdirname)+24);
80
81 /* Loop around, fill the buffer, sort the data and write a temporary file */
82
83 do
84 {
85 int fd,n=0;
86
87 /* Read in the data and create pointers */
88
89 for(i=0;i<nitems;i++)
90 {
91 datap[i]=data+i*itemsize;
92
93 if(ReadFile(fd_in,datap[i],itemsize))
94 {
95 more=0;
96 break;
97 }
98
99 total++;
100 }
101
102 n=i;
103
104 if(n==0)
105 break;
106
107 /* Sort the data pointers using a heap sort */
108
109 heapsort(datap,n,compare);
110
111 /* Shortcut if all read in and sorted at once */
112
113 if(nfiles==0 && n<nitems)
114 {
115 for(i=0;i<n;i++)
116 {
117 if(!buildindex || buildindex(datap[i],count))
118 {
119 WriteFile(fd_out,datap[i],itemsize);
120 count++;
121 }
122 }
123
124 goto tidy_and_exit;
125 }
126
127 /* Create a temporary file and write the result */
128
129 sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,nfiles);
130
131 fd=OpenFile(filename);
132
133 for(i=0;i<n;i++)
134 WriteFile(fd,datap[i],itemsize);
135
136 CloseFile(fd);
137
138 nfiles++;
139 }
140 while(more);
141
142 /* Shortcut if only one file (unlucky for us there must have been exactly
143 nitems, lucky for us we still have the data in RAM) */
144
145 if(nfiles==1)
146 {
147 for(i=0;i<nitems;i++)
148 {
149 if(!buildindex || buildindex(datap[i],count))
150 {
151 WriteFile(fd_out,datap[i],itemsize);
152 count++;
153 }
154 }
155
156 DeleteFile(filename);
157
158 goto tidy_and_exit;
159 }
160
161 /* Check that number of files is less than file size */
162
163 assert(nfiles<nitems);
164
165 /* Open all of the temporary files */
166
167 fds=(int*)malloc(nfiles*sizeof(int));
168
169 for(i=0;i<nfiles;i++)
170 {
171 sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,i);
172
173 fds[i]=ReOpenFile(filename);
174
175 DeleteFile(filename);
176 }
177
178 /* Perform an n-way merge using a binary heap */
179
180 heap=(int*)malloc(nfiles*sizeof(int));
181
182 /* Fill the heap to start with */
183
184 for(i=0;i<nfiles;i++)
185 {
186 int index;
187
188 datap[i]=data+i*itemsize;
189
190 ReadFile(fds[i],datap[i],itemsize);
191
192 heap[i]=i;
193
194 index=i;
195
196 /* Bubble up the new value */
197
198 while(index>0 &&
199 compare(datap[heap[index]],datap[heap[(index-1)/2]])<0)
200 {
201 int newindex;
202 int temp;
203
204 newindex=(index-1)/2;
205
206 temp=heap[index];
207 heap[index]=heap[newindex];
208 heap[newindex]=temp;
209
210 index=newindex;
211 }
212 }
213
214 /* Repeatedly pull out the root of the heap and refill from the same file */
215
216 ndata=nfiles;
217
218 do
219 {
220 int index=0;
221
222 if(!buildindex || buildindex(datap[heap[0]],count))
223 {
224 WriteFile(fd_out,datap[heap[0]],itemsize);
225 count++;
226 }
227
228 if(ReadFile(fds[heap[0]],datap[heap[0]],itemsize))
229 {
230 ndata--;
231 heap[0]=heap[ndata];
232 }
233
234 /* Bubble down the new value */
235
236 while((2*index+2)<ndata &&
237 (compare(datap[heap[index]],datap[heap[2*index+1]])>0 ||
238 compare(datap[heap[index]],datap[heap[2*index+2]])>0))
239 {
240 int newindex;
241 int temp;
242
243 if(compare(datap[heap[2*index+1]],datap[heap[2*index+2]])<0)
244 newindex=2*index+1;
245 else
246 newindex=2*index+2;
247
248 temp=heap[newindex];
249 heap[newindex]=heap[index];
250 heap[index]=temp;
251
252 index=newindex;
253 }
254
255 if((2*index+2)==ndata &&
256 compare(datap[heap[index]],datap[heap[2*index+1]])>0)
257 {
258 int newindex;
259 int temp;
260
261 newindex=2*index+1;
262
263 temp=heap[newindex];
264 heap[newindex]=heap[index];
265 heap[index]=temp;
266 }
267 }
268 while(ndata>0);
269
270 /* Tidy up */
271
272 tidy_and_exit:
273
274 if(fds)
275 {
276 for(i=0;i<nfiles;i++)
277 CloseFile(fds[i]);
278 free(fds);
279 }
280
281 if(heap)
282 free(heap);
283
284 free(data);
285 free(datap);
286 free(filename);
287 }
288
289
290 /*++++++++++++++++++++++++++++++++++++++
291 A function to sort an array of pointers efficiently.
292
293 The data is sorted using a "Heap sort" http://en.wikipedia.org/wiki/Heapsort,
294 in particular an this good because it can operate in-place and doesn't
295 allocate more memory like using qsort() does.
296
297 void **datap A pointer to the array of pointers to sort.
298
299 size_t nitems The number of items of data to sort.
300
301 int(*compare)(const void *, const void *) The comparison function (identical to qsort if the
302 data to be sorted was an array of things not pointers).
303 ++++++++++++++++++++++++++++++++++++++*/
304
305 void heapsort(void **datap,size_t nitems,int(*compare)(const void*, const void*))
306 {
307 int i;
308
309 /* Fill the heap by pretending to insert the data that is already there */
310
311 for(i=1;i<nitems;i++)
312 {
313 int index=i;
314
315 /* Bubble up the new value (upside-down, put largest at top) */
316
317 while(index>0 &&
318 compare(datap[index],datap[(index-1)/2])>0) /* reversed compared to filesort() above */
319 {
320 int newindex;
321 void *temp;
322
323 newindex=(index-1)/2;
324
325 temp=datap[index];
326 datap[index]=datap[newindex];
327 datap[newindex]=temp;
328
329 index=newindex;
330 }
331 }
332
333 /* Repeatedly pull out the root of the heap and swap with the bottom item */
334
335 for(i=nitems-1;i>0;i--)
336 {
337 int index=0;
338 void *temp;
339
340 temp=datap[index];
341 datap[index]=datap[i];
342 datap[i]=temp;
343
344 /* Bubble down the new value (upside-down, put largest at top) */
345
346 while((2*index+2)<i &&
347 (compare(datap[index],datap[2*index+1])<0 || /* reversed compared to filesort() above */
348 compare(datap[index],datap[2*index+2])<0)) /* reversed compared to filesort() above */
349 {
350 int newindex;
351 void *temp;
352
353 if(compare(datap[2*index+1],datap[2*index+2])>0) /* reversed compared to filesort() above */
354 newindex=2*index+1;
355 else
356 newindex=2*index+2;
357
358 temp=datap[newindex];
359 datap[newindex]=datap[index];
360 datap[index]=temp;
361
362 index=newindex;
363 }
364
365 if((2*index+2)==i &&
366 compare(datap[index],datap[2*index+1])<0) /* reversed compared to filesort() above */
367 {
368 int newindex;
369 void *temp;
370
371 newindex=2*index+1;
372
373 temp=datap[newindex];
374 datap[newindex]=datap[index];
375 datap[index]=temp;
376 }
377 }
378 }

Properties

Name Value
cvs:description Functions to perform sorting.