Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino
Contents of /trunk/src/sorting.c
Parent Directory
|
Revision Log
Revision 289 -
(show annotations)
(download)
(as text)
Thu Oct 22 18:17:51 2009 UTC (15 years, 4 months ago) by amb
File MIME type: text/x-csrc
File size: 9156 byte(s)
Thu Oct 22 18:17:51 2009 UTC (15 years, 4 months ago) by amb
File MIME type: text/x-csrc
File size: 9156 byte(s)
Added some missing comments and corrected some existing ones.
1 | /*************************************** |
2 | $Header: /home/amb/CVS/routino/src/sorting.c,v 1.5 2009-10-22 18:17:51 amb Exp $ |
3 | |
4 | Merge sort functions. |
5 | |
6 | Part of the Routino routing software. |
7 | ******************/ /****************** |
8 | This file Copyright 2009 Andrew M. Bishop |
9 | |
10 | This program is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU Affero General Public License as published by |
12 | the Free Software Foundation, either version 3 of the License, or |
13 | (at your option) any later version. |
14 | |
15 | This program is distributed in the hope that it will be useful, |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 | GNU Affero General Public License for more details. |
19 | |
20 | You should have received a copy of the GNU Affero General Public License |
21 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
22 | ***************************************/ |
23 | |
24 | |
25 | #include <stdio.h> |
26 | #include <stdlib.h> |
27 | #include <string.h> |
28 | #include <assert.h> |
29 | |
30 | #include "functions.h" |
31 | |
32 | |
33 | /* Variables */ |
34 | |
35 | /*+ The command line '--tmpdir' option or its default value. +*/ |
36 | extern char *option_tmpdirname; |
37 | |
38 | |
39 | /*++++++++++++++++++++++++++++++++++++++ |
40 | A function to sort the contents of a file using a limited amount of RAM. |
41 | |
42 | The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort |
43 | and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting. |
44 | The individual sort steps and the merge step both use a "Heap sort" |
45 | http://en.wikipedia.org/wiki/Heapsort. The combination of the two should work well |
46 | if the data is already partially sorted. |
47 | |
48 | int fd_in The file descriptor of the input file (opened for reading and at the beginning). |
49 | |
50 | int fd_out The file descriptor of the output file (opened for writing and empty). |
51 | |
52 | size_t itemsize The size of each item in the file that needs sorting. |
53 | |
54 | size_t ramsize The maximum in-core buffer size to use when sorting. |
55 | |
56 | int (*compare)(const void*, const void*) The comparison function (identical to qsort if the |
57 | data to be sorted is an array of things not pointers). |
58 | |
59 | int (*buildindex)(void *,index_t) If non-NULL then this function is called for each item, if it |
60 | returns 1 then it is written to the output file. |
61 | ++++++++++++++++++++++++++++++++++++++*/ |
62 | |
63 | void filesort(int fd_in,int fd_out,size_t itemsize,size_t ramsize,int (*compare)(const void*,const void*), |
64 | int (*buildindex)(void*,index_t)) |
65 | { |
66 | int *fds=NULL,*heap=NULL; |
67 | int nfiles=0,ndata=0; |
68 | index_t count=0,total=0; |
69 | size_t nitems=ramsize/(itemsize+sizeof(void*)); |
70 | void *data=NULL,**datap=NULL; |
71 | char *filename; |
72 | int i,more=1; |
73 | |
74 | /* Allocate the RAM buffer and other bits */ |
75 | |
76 | data=malloc(nitems*itemsize); |
77 | datap=malloc(nitems*sizeof(void*)); |
78 | |
79 | filename=(char*)malloc(strlen(option_tmpdirname)+24); |
80 | |
81 | /* Loop around, fill the buffer, sort the data and write a temporary file */ |
82 | |
83 | do |
84 | { |
85 | int fd,n=0; |
86 | |
87 | /* Read in the data and create pointers */ |
88 | |
89 | for(i=0;i<nitems;i++) |
90 | { |
91 | datap[i]=data+i*itemsize; |
92 | |
93 | if(ReadFile(fd_in,datap[i],itemsize)) |
94 | { |
95 | more=0; |
96 | break; |
97 | } |
98 | |
99 | total++; |
100 | } |
101 | |
102 | n=i; |
103 | |
104 | if(n==0) |
105 | break; |
106 | |
107 | /* Sort the data pointers using a heap sort */ |
108 | |
109 | heapsort(datap,n,compare); |
110 | |
111 | /* Shortcut if all read in and sorted at once */ |
112 | |
113 | if(nfiles==0 && n<nitems) |
114 | { |
115 | for(i=0;i<n;i++) |
116 | { |
117 | if(!buildindex || buildindex(datap[i],count)) |
118 | { |
119 | WriteFile(fd_out,datap[i],itemsize); |
120 | count++; |
121 | } |
122 | } |
123 | |
124 | goto tidy_and_exit; |
125 | } |
126 | |
127 | /* Create a temporary file and write the result */ |
128 | |
129 | sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,nfiles); |
130 | |
131 | fd=OpenFile(filename); |
132 | |
133 | for(i=0;i<n;i++) |
134 | WriteFile(fd,datap[i],itemsize); |
135 | |
136 | CloseFile(fd); |
137 | |
138 | nfiles++; |
139 | } |
140 | while(more); |
141 | |
142 | /* Shortcut if only one file (unlucky for us there must have been exactly |
143 | nitems, lucky for us we still have the data in RAM) */ |
144 | |
145 | if(nfiles==1) |
146 | { |
147 | for(i=0;i<nitems;i++) |
148 | { |
149 | if(!buildindex || buildindex(datap[i],count)) |
150 | { |
151 | WriteFile(fd_out,datap[i],itemsize); |
152 | count++; |
153 | } |
154 | } |
155 | |
156 | DeleteFile(filename); |
157 | |
158 | goto tidy_and_exit; |
159 | } |
160 | |
161 | /* Check that number of files is less than file size */ |
162 | |
163 | assert(nfiles<nitems); |
164 | |
165 | /* Open all of the temporary files */ |
166 | |
167 | fds=(int*)malloc(nfiles*sizeof(int)); |
168 | |
169 | for(i=0;i<nfiles;i++) |
170 | { |
171 | sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,i); |
172 | |
173 | fds[i]=ReOpenFile(filename); |
174 | |
175 | DeleteFile(filename); |
176 | } |
177 | |
178 | /* Perform an n-way merge using a binary heap */ |
179 | |
180 | heap=(int*)malloc(nfiles*sizeof(int)); |
181 | |
182 | /* Fill the heap to start with */ |
183 | |
184 | for(i=0;i<nfiles;i++) |
185 | { |
186 | int index; |
187 | |
188 | datap[i]=data+i*itemsize; |
189 | |
190 | ReadFile(fds[i],datap[i],itemsize); |
191 | |
192 | heap[i]=i; |
193 | |
194 | index=i; |
195 | |
196 | /* Bubble up the new value */ |
197 | |
198 | while(index>0 && |
199 | compare(datap[heap[index]],datap[heap[(index-1)/2]])<0) |
200 | { |
201 | int newindex; |
202 | int temp; |
203 | |
204 | newindex=(index-1)/2; |
205 | |
206 | temp=heap[index]; |
207 | heap[index]=heap[newindex]; |
208 | heap[newindex]=temp; |
209 | |
210 | index=newindex; |
211 | } |
212 | } |
213 | |
214 | /* Repeatedly pull out the root of the heap and refill from the same file */ |
215 | |
216 | ndata=nfiles; |
217 | |
218 | do |
219 | { |
220 | int index=0; |
221 | |
222 | if(!buildindex || buildindex(datap[heap[0]],count)) |
223 | { |
224 | WriteFile(fd_out,datap[heap[0]],itemsize); |
225 | count++; |
226 | } |
227 | |
228 | if(ReadFile(fds[heap[0]],datap[heap[0]],itemsize)) |
229 | { |
230 | ndata--; |
231 | heap[0]=heap[ndata]; |
232 | } |
233 | |
234 | /* Bubble down the new value */ |
235 | |
236 | while((2*index+2)<ndata && |
237 | (compare(datap[heap[index]],datap[heap[2*index+1]])>0 || |
238 | compare(datap[heap[index]],datap[heap[2*index+2]])>0)) |
239 | { |
240 | int newindex; |
241 | int temp; |
242 | |
243 | if(compare(datap[heap[2*index+1]],datap[heap[2*index+2]])<0) |
244 | newindex=2*index+1; |
245 | else |
246 | newindex=2*index+2; |
247 | |
248 | temp=heap[newindex]; |
249 | heap[newindex]=heap[index]; |
250 | heap[index]=temp; |
251 | |
252 | index=newindex; |
253 | } |
254 | |
255 | if((2*index+2)==ndata && |
256 | compare(datap[heap[index]],datap[heap[2*index+1]])>0) |
257 | { |
258 | int newindex; |
259 | int temp; |
260 | |
261 | newindex=2*index+1; |
262 | |
263 | temp=heap[newindex]; |
264 | heap[newindex]=heap[index]; |
265 | heap[index]=temp; |
266 | } |
267 | } |
268 | while(ndata>0); |
269 | |
270 | /* Tidy up */ |
271 | |
272 | tidy_and_exit: |
273 | |
274 | if(fds) |
275 | { |
276 | for(i=0;i<nfiles;i++) |
277 | CloseFile(fds[i]); |
278 | free(fds); |
279 | } |
280 | |
281 | if(heap) |
282 | free(heap); |
283 | |
284 | free(data); |
285 | free(datap); |
286 | free(filename); |
287 | } |
288 | |
289 | |
290 | /*++++++++++++++++++++++++++++++++++++++ |
291 | A function to sort an array of pointers efficiently. |
292 | |
293 | The data is sorted using a "Heap sort" http://en.wikipedia.org/wiki/Heapsort, |
294 | in particular an this good because it can operate in-place and doesn't |
295 | allocate more memory like using qsort() does. |
296 | |
297 | void **datap A pointer to the array of pointers to sort. |
298 | |
299 | size_t nitems The number of items of data to sort. |
300 | |
301 | int(*compare)(const void *, const void *) The comparison function (identical to qsort if the |
302 | data to be sorted was an array of things not pointers). |
303 | ++++++++++++++++++++++++++++++++++++++*/ |
304 | |
305 | void heapsort(void **datap,size_t nitems,int(*compare)(const void*, const void*)) |
306 | { |
307 | int i; |
308 | |
309 | /* Fill the heap by pretending to insert the data that is already there */ |
310 | |
311 | for(i=1;i<nitems;i++) |
312 | { |
313 | int index=i; |
314 | |
315 | /* Bubble up the new value (upside-down, put largest at top) */ |
316 | |
317 | while(index>0 && |
318 | compare(datap[index],datap[(index-1)/2])>0) /* reversed compared to filesort() above */ |
319 | { |
320 | int newindex; |
321 | void *temp; |
322 | |
323 | newindex=(index-1)/2; |
324 | |
325 | temp=datap[index]; |
326 | datap[index]=datap[newindex]; |
327 | datap[newindex]=temp; |
328 | |
329 | index=newindex; |
330 | } |
331 | } |
332 | |
333 | /* Repeatedly pull out the root of the heap and swap with the bottom item */ |
334 | |
335 | for(i=nitems-1;i>0;i--) |
336 | { |
337 | int index=0; |
338 | void *temp; |
339 | |
340 | temp=datap[index]; |
341 | datap[index]=datap[i]; |
342 | datap[i]=temp; |
343 | |
344 | /* Bubble down the new value (upside-down, put largest at top) */ |
345 | |
346 | while((2*index+2)<i && |
347 | (compare(datap[index],datap[2*index+1])<0 || /* reversed compared to filesort() above */ |
348 | compare(datap[index],datap[2*index+2])<0)) /* reversed compared to filesort() above */ |
349 | { |
350 | int newindex; |
351 | void *temp; |
352 | |
353 | if(compare(datap[2*index+1],datap[2*index+2])>0) /* reversed compared to filesort() above */ |
354 | newindex=2*index+1; |
355 | else |
356 | newindex=2*index+2; |
357 | |
358 | temp=datap[newindex]; |
359 | datap[newindex]=datap[index]; |
360 | datap[index]=temp; |
361 | |
362 | index=newindex; |
363 | } |
364 | |
365 | if((2*index+2)==i && |
366 | compare(datap[index],datap[2*index+1])<0) /* reversed compared to filesort() above */ |
367 | { |
368 | int newindex; |
369 | void *temp; |
370 | |
371 | newindex=2*index+1; |
372 | |
373 | temp=datap[newindex]; |
374 | datap[newindex]=datap[index]; |
375 | datap[index]=temp; |
376 | } |
377 | } |
378 | } |
Properties
Name | Value |
---|---|
cvs:description | Functions to perform sorting. |