Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino
Annotation of /trunk/src/sorting.c
Parent Directory
|
Revision Log
Revision 284 -
(hide annotations)
(download)
(as text)
Mon Oct 12 17:35:26 2009 UTC (15 years, 5 months ago) by amb
File MIME type: text/x-csrc
File size: 9091 byte(s)
Mon Oct 12 17:35:26 2009 UTC (15 years, 5 months ago) by amb
File MIME type: text/x-csrc
File size: 9091 byte(s)
Rename the tmpdirname variable.
1 | amb | 269 | /*************************************** |
2 | amb | 284 | $Header: /home/amb/CVS/routino/src/sorting.c,v 1.4 2009-10-12 17:35:26 amb Exp $ |
3 | amb | 269 | |
4 | Merge sort functions. | ||
5 | |||
6 | Part of the Routino routing software. | ||
7 | ******************/ /****************** | ||
8 | This file Copyright 2009 Andrew M. Bishop | ||
9 | |||
10 | This program is free software: you can redistribute it and/or modify | ||
11 | it under the terms of the GNU Affero General Public License as published by | ||
12 | the Free Software Foundation, either version 3 of the License, or | ||
13 | (at your option) any later version. | ||
14 | |||
15 | This program is distributed in the hope that it will be useful, | ||
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | GNU Affero General Public License for more details. | ||
19 | |||
20 | You should have received a copy of the GNU Affero General Public License | ||
21 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
22 | ***************************************/ | ||
23 | |||
24 | |||
25 | #include <stdio.h> | ||
26 | #include <stdlib.h> | ||
27 | #include <string.h> | ||
28 | #include <assert.h> | ||
29 | |||
30 | #include "functions.h" | ||
31 | |||
32 | |||
33 | /* Variables */ | ||
34 | |||
35 | amb | 284 | extern char *option_tmpdirname; |
36 | amb | 269 | |
37 | |||
38 | /*++++++++++++++++++++++++++++++++++++++ | ||
39 | A function to sort the contents of a file using a limited amount of RAM. | ||
40 | |||
41 | The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort | ||
42 | and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting. | ||
43 | The individual sort steps and the merge step both use a "Heap sort" | ||
44 | http://en.wikipedia.org/wiki/Heapsort. The combination of the two should work well | ||
45 | if the data is already partially sorted. | ||
46 | |||
47 | int fd_in The file descriptor of the input file (opened for reading and at the beginning). | ||
48 | |||
49 | int fd_out The file descriptor of the output file (opened for writing and empty). | ||
50 | |||
51 | size_t itemsize The size of each item in the file that needs sorting. | ||
52 | |||
53 | size_t ramsize The maximum in-core buffer size to use when sorting. | ||
54 | |||
55 | int (*compare)(const void*, const void*) The comparison function (identical to qsort if the | ||
56 | data to be sorted is an array of things not pointers). | ||
57 | |||
58 | amb | 274 | int (*buildindex)(void *,index_t) If non-NULL then this function is called for each item, if it |
59 | returns 1 then it is written to the output file. | ||
60 | amb | 269 | ++++++++++++++++++++++++++++++++++++++*/ |
61 | |||
62 | void filesort(int fd_in,int fd_out,size_t itemsize,size_t ramsize,int (*compare)(const void*,const void*), | ||
63 | amb | 274 | int (*buildindex)(void*,index_t)) |
64 | amb | 269 | { |
65 | int *fds=NULL,*heap=NULL; | ||
66 | int nfiles=0,ndata=0; | ||
67 | index_t count=0,total=0; | ||
68 | size_t nitems=ramsize/(itemsize+sizeof(void*)); | ||
69 | amb | 283 | void *data=NULL,**datap=NULL; |
70 | amb | 269 | char *filename; |
71 | int i,more=1; | ||
72 | |||
73 | /* Allocate the RAM buffer and other bits */ | ||
74 | |||
75 | data=malloc(nitems*itemsize); | ||
76 | datap=malloc(nitems*sizeof(void*)); | ||
77 | |||
78 | amb | 284 | filename=(char*)malloc(strlen(option_tmpdirname)+24); |
79 | amb | 269 | |
80 | /* Loop around, fill the buffer, sort the data and write a temporary file */ | ||
81 | |||
82 | do | ||
83 | { | ||
84 | int fd,n=0; | ||
85 | |||
86 | /* Read in the data and create pointers */ | ||
87 | |||
88 | for(i=0;i<nitems;i++) | ||
89 | { | ||
90 | datap[i]=data+i*itemsize; | ||
91 | |||
92 | if(ReadFile(fd_in,datap[i],itemsize)) | ||
93 | { | ||
94 | more=0; | ||
95 | break; | ||
96 | } | ||
97 | |||
98 | total++; | ||
99 | } | ||
100 | |||
101 | n=i; | ||
102 | |||
103 | if(n==0) | ||
104 | break; | ||
105 | |||
106 | /* Sort the data pointers using a heap sort */ | ||
107 | |||
108 | heapsort(datap,n,compare); | ||
109 | |||
110 | /* Shortcut if all read in and sorted at once */ | ||
111 | |||
112 | if(nfiles==0 && n<nitems) | ||
113 | { | ||
114 | for(i=0;i<n;i++) | ||
115 | { | ||
116 | amb | 274 | if(!buildindex || buildindex(datap[i],count)) |
117 | { | ||
118 | WriteFile(fd_out,datap[i],itemsize); | ||
119 | count++; | ||
120 | } | ||
121 | amb | 269 | } |
122 | |||
123 | amb | 283 | goto tidy_and_exit; |
124 | amb | 269 | } |
125 | |||
126 | /* Create a temporary file and write the result */ | ||
127 | |||
128 | amb | 284 | sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,nfiles); |
129 | amb | 269 | |
130 | fd=OpenFile(filename); | ||
131 | |||
132 | for(i=0;i<n;i++) | ||
133 | WriteFile(fd,datap[i],itemsize); | ||
134 | |||
135 | CloseFile(fd); | ||
136 | |||
137 | nfiles++; | ||
138 | } | ||
139 | while(more); | ||
140 | |||
141 | /* Shortcut if only one file (unlucky for us there must have been exactly | ||
142 | nitems, lucky for us we still have the data in RAM) */ | ||
143 | |||
144 | if(nfiles==1) | ||
145 | { | ||
146 | for(i=0;i<nitems;i++) | ||
147 | { | ||
148 | amb | 274 | if(!buildindex || buildindex(datap[i],count)) |
149 | { | ||
150 | WriteFile(fd_out,datap[i],itemsize); | ||
151 | count++; | ||
152 | } | ||
153 | amb | 269 | } |
154 | |||
155 | DeleteFile(filename); | ||
156 | |||
157 | amb | 283 | goto tidy_and_exit; |
158 | amb | 269 | } |
159 | |||
160 | /* Check that number of files is less than file size */ | ||
161 | |||
162 | assert(nfiles<nitems); | ||
163 | |||
164 | /* Open all of the temporary files */ | ||
165 | |||
166 | fds=(int*)malloc(nfiles*sizeof(int)); | ||
167 | |||
168 | for(i=0;i<nfiles;i++) | ||
169 | { | ||
170 | amb | 284 | sprintf(filename,"%s/filesort.%d.tmp",option_tmpdirname,i); |
171 | amb | 269 | |
172 | fds[i]=ReOpenFile(filename); | ||
173 | |||
174 | DeleteFile(filename); | ||
175 | } | ||
176 | |||
177 | /* Perform an n-way merge using a binary heap */ | ||
178 | |||
179 | heap=(int*)malloc(nfiles*sizeof(int)); | ||
180 | |||
181 | /* Fill the heap to start with */ | ||
182 | |||
183 | for(i=0;i<nfiles;i++) | ||
184 | { | ||
185 | int index; | ||
186 | |||
187 | datap[i]=data+i*itemsize; | ||
188 | |||
189 | ReadFile(fds[i],datap[i],itemsize); | ||
190 | |||
191 | heap[i]=i; | ||
192 | |||
193 | index=i; | ||
194 | |||
195 | /* Bubble up the new value */ | ||
196 | |||
197 | while(index>0 && | ||
198 | compare(datap[heap[index]],datap[heap[(index-1)/2]])<0) | ||
199 | { | ||
200 | int newindex; | ||
201 | int temp; | ||
202 | |||
203 | newindex=(index-1)/2; | ||
204 | |||
205 | temp=heap[index]; | ||
206 | heap[index]=heap[newindex]; | ||
207 | heap[newindex]=temp; | ||
208 | |||
209 | index=newindex; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | /* Repeatedly pull out the root of the heap and refill from the same file */ | ||
214 | |||
215 | ndata=nfiles; | ||
216 | |||
217 | do | ||
218 | { | ||
219 | int index=0; | ||
220 | |||
221 | amb | 274 | if(!buildindex || buildindex(datap[heap[0]],count)) |
222 | { | ||
223 | WriteFile(fd_out,datap[heap[0]],itemsize); | ||
224 | count++; | ||
225 | } | ||
226 | amb | 269 | |
227 | if(ReadFile(fds[heap[0]],datap[heap[0]],itemsize)) | ||
228 | { | ||
229 | ndata--; | ||
230 | heap[0]=heap[ndata]; | ||
231 | } | ||
232 | |||
233 | /* Bubble down the new value */ | ||
234 | |||
235 | while((2*index+2)<ndata && | ||
236 | (compare(datap[heap[index]],datap[heap[2*index+1]])>0 || | ||
237 | compare(datap[heap[index]],datap[heap[2*index+2]])>0)) | ||
238 | { | ||
239 | int newindex; | ||
240 | int temp; | ||
241 | |||
242 | if(compare(datap[heap[2*index+1]],datap[heap[2*index+2]])<0) | ||
243 | newindex=2*index+1; | ||
244 | else | ||
245 | newindex=2*index+2; | ||
246 | |||
247 | temp=heap[newindex]; | ||
248 | heap[newindex]=heap[index]; | ||
249 | heap[index]=temp; | ||
250 | |||
251 | index=newindex; | ||
252 | } | ||
253 | |||
254 | if((2*index+2)==ndata && | ||
255 | compare(datap[heap[index]],datap[heap[2*index+1]])>0) | ||
256 | { | ||
257 | int newindex; | ||
258 | int temp; | ||
259 | |||
260 | newindex=2*index+1; | ||
261 | |||
262 | temp=heap[newindex]; | ||
263 | heap[newindex]=heap[index]; | ||
264 | heap[index]=temp; | ||
265 | } | ||
266 | } | ||
267 | while(ndata>0); | ||
268 | |||
269 | /* Tidy up */ | ||
270 | |||
271 | amb | 283 | tidy_and_exit: |
272 | amb | 269 | |
273 | amb | 283 | if(fds) |
274 | { | ||
275 | for(i=0;i<nfiles;i++) | ||
276 | CloseFile(fds[i]); | ||
277 | free(fds); | ||
278 | } | ||
279 | |||
280 | if(heap) | ||
281 | free(heap); | ||
282 | |||
283 | amb | 269 | free(data); |
284 | free(datap); | ||
285 | free(filename); | ||
286 | } | ||
287 | |||
288 | |||
289 | /*++++++++++++++++++++++++++++++++++++++ | ||
290 | A function to sort an array of pointers efficiently. | ||
291 | |||
292 | The data is sorted using a "Heap sort" http://en.wikipedia.org/wiki/Heapsort, | ||
293 | in particular an this good because it can operate in-place and doesn't | ||
294 | allocate more memory like using qsort() does. | ||
295 | |||
296 | void **datap A pointer to the array of pointers to sort. | ||
297 | |||
298 | size_t nitems The number of items of data to sort. | ||
299 | |||
300 | int(*compare)(const void *, const void *) The comparison function (identical to qsort if the | ||
301 | data to be sorted was an array of things not pointers). | ||
302 | ++++++++++++++++++++++++++++++++++++++*/ | ||
303 | |||
304 | void heapsort(void **datap,size_t nitems,int(*compare)(const void*, const void*)) | ||
305 | { | ||
306 | int i; | ||
307 | |||
308 | /* Fill the heap by pretending to insert the data that is already there */ | ||
309 | |||
310 | for(i=1;i<nitems;i++) | ||
311 | { | ||
312 | int index=i; | ||
313 | |||
314 | /* Bubble up the new value (upside-down, put largest at top) */ | ||
315 | |||
316 | while(index>0 && | ||
317 | compare(datap[index],datap[(index-1)/2])>0) /* reversed compared to filesort() above */ | ||
318 | { | ||
319 | int newindex; | ||
320 | void *temp; | ||
321 | |||
322 | newindex=(index-1)/2; | ||
323 | |||
324 | temp=datap[index]; | ||
325 | datap[index]=datap[newindex]; | ||
326 | datap[newindex]=temp; | ||
327 | |||
328 | index=newindex; | ||
329 | } | ||
330 | } | ||
331 | |||
332 | /* Repeatedly pull out the root of the heap and swap with the bottom item */ | ||
333 | |||
334 | for(i=nitems-1;i>0;i--) | ||
335 | { | ||
336 | int index=0; | ||
337 | void *temp; | ||
338 | |||
339 | temp=datap[index]; | ||
340 | datap[index]=datap[i]; | ||
341 | datap[i]=temp; | ||
342 | |||
343 | /* Bubble down the new value (upside-down, put largest at top) */ | ||
344 | |||
345 | while((2*index+2)<i && | ||
346 | (compare(datap[index],datap[2*index+1])<0 || /* reversed compared to filesort() above */ | ||
347 | compare(datap[index],datap[2*index+2])<0)) /* reversed compared to filesort() above */ | ||
348 | { | ||
349 | int newindex; | ||
350 | void *temp; | ||
351 | |||
352 | if(compare(datap[2*index+1],datap[2*index+2])>0) /* reversed compared to filesort() above */ | ||
353 | newindex=2*index+1; | ||
354 | else | ||
355 | newindex=2*index+2; | ||
356 | |||
357 | temp=datap[newindex]; | ||
358 | datap[newindex]=datap[index]; | ||
359 | datap[index]=temp; | ||
360 | |||
361 | index=newindex; | ||
362 | } | ||
363 | |||
364 | if((2*index+2)==i && | ||
365 | compare(datap[index],datap[2*index+1])<0) /* reversed compared to filesort() above */ | ||
366 | { | ||
367 | int newindex; | ||
368 | void *temp; | ||
369 | |||
370 | newindex=2*index+1; | ||
371 | |||
372 | temp=datap[newindex]; | ||
373 | datap[newindex]=datap[index]; | ||
374 | datap[index]=temp; | ||
375 | } | ||
376 | } | ||
377 | } |
Properties
Name | Value |
---|---|
cvs:description | Functions to perform sorting. |