Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino
Annotation of /trunk/src/sorting.c
Parent Directory
|
Revision Log
Revision 269 -
(hide annotations)
(download)
(as text)
Sun Oct 4 10:44:51 2009 UTC (15 years, 6 months ago) by amb
File MIME type: text/x-csrc
File size: 8865 byte(s)
Sun Oct 4 10:44:51 2009 UTC (15 years, 6 months ago) by amb
File MIME type: text/x-csrc
File size: 8865 byte(s)
Initial revision
1 | amb | 269 | /*************************************** |
2 | $Header: /home/amb/CVS/routino/src/sorting.c,v 1.1 2009-10-04 10:44:51 amb Exp $ | ||
3 | |||
4 | Merge sort functions. | ||
5 | |||
6 | Part of the Routino routing software. | ||
7 | ******************/ /****************** | ||
8 | This file Copyright 2009 Andrew M. Bishop | ||
9 | |||
10 | This program is free software: you can redistribute it and/or modify | ||
11 | it under the terms of the GNU Affero General Public License as published by | ||
12 | the Free Software Foundation, either version 3 of the License, or | ||
13 | (at your option) any later version. | ||
14 | |||
15 | This program is distributed in the hope that it will be useful, | ||
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | GNU Affero General Public License for more details. | ||
19 | |||
20 | You should have received a copy of the GNU Affero General Public License | ||
21 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
22 | ***************************************/ | ||
23 | |||
24 | |||
25 | #include <stdio.h> | ||
26 | #include <stdlib.h> | ||
27 | #include <string.h> | ||
28 | #include <assert.h> | ||
29 | |||
30 | #include "functions.h" | ||
31 | |||
32 | |||
33 | /* Variables */ | ||
34 | |||
35 | extern char *tmpdirname; | ||
36 | |||
37 | |||
38 | /*++++++++++++++++++++++++++++++++++++++ | ||
39 | A function to sort the contents of a file using a limited amount of RAM. | ||
40 | |||
41 | The data is sorted using a "Merge sort" http://en.wikipedia.org/wiki/Merge_sort | ||
42 | and in particular an "external sort" http://en.wikipedia.org/wiki/External_sorting. | ||
43 | The individual sort steps and the merge step both use a "Heap sort" | ||
44 | http://en.wikipedia.org/wiki/Heapsort. The combination of the two should work well | ||
45 | if the data is already partially sorted. | ||
46 | |||
47 | int fd_in The file descriptor of the input file (opened for reading and at the beginning). | ||
48 | |||
49 | int fd_out The file descriptor of the output file (opened for writing and empty). | ||
50 | |||
51 | size_t itemsize The size of each item in the file that needs sorting. | ||
52 | |||
53 | size_t ramsize The maximum in-core buffer size to use when sorting. | ||
54 | |||
55 | int (*compare)(const void*, const void*) The comparison function (identical to qsort if the | ||
56 | data to be sorted is an array of things not pointers). | ||
57 | |||
58 | void (buildindex)(void *,index_t,index_t) If non-NULL then this function is called for each item as | ||
59 | it is written to the output file. | ||
60 | ++++++++++++++++++++++++++++++++++++++*/ | ||
61 | |||
62 | void filesort(int fd_in,int fd_out,size_t itemsize,size_t ramsize,int (*compare)(const void*,const void*), | ||
63 | void (*buildindex)(void*,index_t,index_t)) | ||
64 | { | ||
65 | int *fds=NULL,*heap=NULL; | ||
66 | int nfiles=0,ndata=0; | ||
67 | index_t count=0,total=0; | ||
68 | size_t nitems=ramsize/(itemsize+sizeof(void*)); | ||
69 | void *data,**datap; | ||
70 | char *filename; | ||
71 | int i,more=1; | ||
72 | |||
73 | /* Allocate the RAM buffer and other bits */ | ||
74 | |||
75 | data=malloc(nitems*itemsize); | ||
76 | datap=malloc(nitems*sizeof(void*)); | ||
77 | |||
78 | filename=(char*)malloc(strlen(tmpdirname)+24); | ||
79 | |||
80 | /* Loop around, fill the buffer, sort the data and write a temporary file */ | ||
81 | |||
82 | do | ||
83 | { | ||
84 | int fd,n=0; | ||
85 | |||
86 | /* Read in the data and create pointers */ | ||
87 | |||
88 | for(i=0;i<nitems;i++) | ||
89 | { | ||
90 | datap[i]=data+i*itemsize; | ||
91 | |||
92 | if(ReadFile(fd_in,datap[i],itemsize)) | ||
93 | { | ||
94 | more=0; | ||
95 | break; | ||
96 | } | ||
97 | |||
98 | total++; | ||
99 | } | ||
100 | |||
101 | n=i; | ||
102 | |||
103 | if(n==0) | ||
104 | break; | ||
105 | |||
106 | /* Sort the data pointers using a heap sort */ | ||
107 | |||
108 | heapsort(datap,n,compare); | ||
109 | |||
110 | /* Shortcut if all read in and sorted at once */ | ||
111 | |||
112 | if(nfiles==0 && n<nitems) | ||
113 | { | ||
114 | for(i=0;i<n;i++) | ||
115 | { | ||
116 | if(buildindex) | ||
117 | buildindex(datap[i],i,n); | ||
118 | |||
119 | WriteFile(fd_out,datap[i],itemsize); | ||
120 | } | ||
121 | |||
122 | return; | ||
123 | } | ||
124 | |||
125 | /* Create a temporary file and write the result */ | ||
126 | |||
127 | sprintf(filename,"%s/filesort.%d.tmp",tmpdirname,nfiles); | ||
128 | |||
129 | fd=OpenFile(filename); | ||
130 | |||
131 | for(i=0;i<n;i++) | ||
132 | WriteFile(fd,datap[i],itemsize); | ||
133 | |||
134 | CloseFile(fd); | ||
135 | |||
136 | nfiles++; | ||
137 | } | ||
138 | while(more); | ||
139 | |||
140 | /* Shortcut if only one file (unlucky for us there must have been exactly | ||
141 | nitems, lucky for us we still have the data in RAM) */ | ||
142 | |||
143 | if(nfiles==1) | ||
144 | { | ||
145 | for(i=0;i<nitems;i++) | ||
146 | { | ||
147 | if(buildindex) | ||
148 | buildindex(datap[i],i,nitems); | ||
149 | |||
150 | WriteFile(fd_out,datap[i],itemsize); | ||
151 | } | ||
152 | |||
153 | DeleteFile(filename); | ||
154 | |||
155 | return; | ||
156 | } | ||
157 | |||
158 | /* Check that number of files is less than file size */ | ||
159 | |||
160 | assert(nfiles<nitems); | ||
161 | |||
162 | /* Open all of the temporary files */ | ||
163 | |||
164 | fds=(int*)malloc(nfiles*sizeof(int)); | ||
165 | |||
166 | for(i=0;i<nfiles;i++) | ||
167 | { | ||
168 | sprintf(filename,"%s/filesort.%d.tmp",tmpdirname,i); | ||
169 | |||
170 | fds[i]=ReOpenFile(filename); | ||
171 | |||
172 | DeleteFile(filename); | ||
173 | } | ||
174 | |||
175 | /* Perform an n-way merge using a binary heap */ | ||
176 | |||
177 | heap=(int*)malloc(nfiles*sizeof(int)); | ||
178 | |||
179 | /* Fill the heap to start with */ | ||
180 | |||
181 | for(i=0;i<nfiles;i++) | ||
182 | { | ||
183 | int index; | ||
184 | |||
185 | datap[i]=data+i*itemsize; | ||
186 | |||
187 | ReadFile(fds[i],datap[i],itemsize); | ||
188 | |||
189 | heap[i]=i; | ||
190 | |||
191 | index=i; | ||
192 | |||
193 | /* Bubble up the new value */ | ||
194 | |||
195 | while(index>0 && | ||
196 | compare(datap[heap[index]],datap[heap[(index-1)/2]])<0) | ||
197 | { | ||
198 | int newindex; | ||
199 | int temp; | ||
200 | |||
201 | newindex=(index-1)/2; | ||
202 | |||
203 | temp=heap[index]; | ||
204 | heap[index]=heap[newindex]; | ||
205 | heap[newindex]=temp; | ||
206 | |||
207 | index=newindex; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* Repeatedly pull out the root of the heap and refill from the same file */ | ||
212 | |||
213 | ndata=nfiles; | ||
214 | |||
215 | do | ||
216 | { | ||
217 | int index=0; | ||
218 | |||
219 | if(buildindex) | ||
220 | buildindex(datap[heap[0]],count++,total); | ||
221 | |||
222 | WriteFile(fd_out,datap[heap[0]],itemsize); | ||
223 | |||
224 | if(ReadFile(fds[heap[0]],datap[heap[0]],itemsize)) | ||
225 | { | ||
226 | ndata--; | ||
227 | heap[0]=heap[ndata]; | ||
228 | } | ||
229 | |||
230 | /* Bubble down the new value */ | ||
231 | |||
232 | while((2*index+2)<ndata && | ||
233 | (compare(datap[heap[index]],datap[heap[2*index+1]])>0 || | ||
234 | compare(datap[heap[index]],datap[heap[2*index+2]])>0)) | ||
235 | { | ||
236 | int newindex; | ||
237 | int temp; | ||
238 | |||
239 | if(compare(datap[heap[2*index+1]],datap[heap[2*index+2]])<0) | ||
240 | newindex=2*index+1; | ||
241 | else | ||
242 | newindex=2*index+2; | ||
243 | |||
244 | temp=heap[newindex]; | ||
245 | heap[newindex]=heap[index]; | ||
246 | heap[index]=temp; | ||
247 | |||
248 | index=newindex; | ||
249 | } | ||
250 | |||
251 | if((2*index+2)==ndata && | ||
252 | compare(datap[heap[index]],datap[heap[2*index+1]])>0) | ||
253 | { | ||
254 | int newindex; | ||
255 | int temp; | ||
256 | |||
257 | newindex=2*index+1; | ||
258 | |||
259 | temp=heap[newindex]; | ||
260 | heap[newindex]=heap[index]; | ||
261 | heap[index]=temp; | ||
262 | } | ||
263 | } | ||
264 | while(ndata>0); | ||
265 | |||
266 | /* Tidy up */ | ||
267 | |||
268 | for(i=0;i<nfiles;i++) | ||
269 | CloseFile(fds[i]); | ||
270 | |||
271 | free(heap); | ||
272 | free(data); | ||
273 | free(datap); | ||
274 | free(filename); | ||
275 | } | ||
276 | |||
277 | |||
278 | /*++++++++++++++++++++++++++++++++++++++ | ||
279 | A function to sort an array of pointers efficiently. | ||
280 | |||
281 | The data is sorted using a "Heap sort" http://en.wikipedia.org/wiki/Heapsort, | ||
282 | in particular an this good because it can operate in-place and doesn't | ||
283 | allocate more memory like using qsort() does. | ||
284 | |||
285 | void **datap A pointer to the array of pointers to sort. | ||
286 | |||
287 | size_t nitems The number of items of data to sort. | ||
288 | |||
289 | int(*compare)(const void *, const void *) The comparison function (identical to qsort if the | ||
290 | data to be sorted was an array of things not pointers). | ||
291 | ++++++++++++++++++++++++++++++++++++++*/ | ||
292 | |||
293 | void heapsort(void **datap,size_t nitems,int(*compare)(const void*, const void*)) | ||
294 | { | ||
295 | int i; | ||
296 | |||
297 | /* Fill the heap by pretending to insert the data that is already there */ | ||
298 | |||
299 | for(i=1;i<nitems;i++) | ||
300 | { | ||
301 | int index=i; | ||
302 | |||
303 | /* Bubble up the new value (upside-down, put largest at top) */ | ||
304 | |||
305 | while(index>0 && | ||
306 | compare(datap[index],datap[(index-1)/2])>0) /* reversed compared to filesort() above */ | ||
307 | { | ||
308 | int newindex; | ||
309 | void *temp; | ||
310 | |||
311 | newindex=(index-1)/2; | ||
312 | |||
313 | temp=datap[index]; | ||
314 | datap[index]=datap[newindex]; | ||
315 | datap[newindex]=temp; | ||
316 | |||
317 | index=newindex; | ||
318 | } | ||
319 | } | ||
320 | |||
321 | /* Repeatedly pull out the root of the heap and swap with the bottom item */ | ||
322 | |||
323 | for(i=nitems-1;i>0;i--) | ||
324 | { | ||
325 | int index=0; | ||
326 | void *temp; | ||
327 | |||
328 | temp=datap[index]; | ||
329 | datap[index]=datap[i]; | ||
330 | datap[i]=temp; | ||
331 | |||
332 | /* Bubble down the new value (upside-down, put largest at top) */ | ||
333 | |||
334 | while((2*index+2)<i && | ||
335 | (compare(datap[index],datap[2*index+1])<0 || /* reversed compared to filesort() above */ | ||
336 | compare(datap[index],datap[2*index+2])<0)) /* reversed compared to filesort() above */ | ||
337 | { | ||
338 | int newindex; | ||
339 | void *temp; | ||
340 | |||
341 | if(compare(datap[2*index+1],datap[2*index+2])>0) /* reversed compared to filesort() above */ | ||
342 | newindex=2*index+1; | ||
343 | else | ||
344 | newindex=2*index+2; | ||
345 | |||
346 | temp=datap[newindex]; | ||
347 | datap[newindex]=datap[index]; | ||
348 | datap[index]=temp; | ||
349 | |||
350 | index=newindex; | ||
351 | } | ||
352 | |||
353 | if((2*index+2)==i && | ||
354 | compare(datap[index],datap[2*index+1])<0) /* reversed compared to filesort() above */ | ||
355 | { | ||
356 | int newindex; | ||
357 | void *temp; | ||
358 | |||
359 | newindex=2*index+1; | ||
360 | |||
361 | temp=datap[newindex]; | ||
362 | datap[newindex]=datap[index]; | ||
363 | datap[index]=temp; | ||
364 | } | ||
365 | } | ||
366 | } |
Properties
Name | Value |
---|---|
cvs:description | Functions to perform sorting. |