Routino SVN Repository Browser

Check out the latest version of Routino: svn co http://routino.org/svn/trunk routino

ViewVC logotype

Contents of /trunk/src/osmpbfparse.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1221 - (show annotations) (download) (as text)
Fri Dec 21 16:08:44 2012 UTC (12 years, 2 months ago) by amb
File MIME type: text/x-csrc
File size: 36525 byte(s)
Add a parser for OSM PBF format.
Separate the XML parser from the data processing in osmparser.c.
Update planetsplitter and documentation to use new format.

1 /***************************************
2 A simple osm-specific PBF parser where the structure is hard-coded.
3
4 Part of the Routino routing software.
5 ******************/ /******************
6 This file Copyright 2012 Andrew M. Bishop
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU Affero General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU Affero General Public License for more details.
17
18 You should have received a copy of the GNU Affero General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 ***************************************/
21
22
23 #include <stdio.h>
24 #include <unistd.h>
25 #include <stdlib.h>
26 #include <inttypes.h>
27 #include <stdint.h>
28 #include <string.h>
29
30 #if defined(USE_GZIP) && USE_GZIP
31 #include <zlib.h>
32 #endif
33
34 #include "types.h"
35
36 #include "osmparser.h"
37 #include "tagging.h"
38 #include "logging.h"
39
40
41 /* Inside a BlobHeader message */
42
43 #define PBF_VAL_BLOBHEADER_TYPE 1
44 #define PBF_VAL_BLOBHEADER_SIZE 3
45
46 /* Inside a Blob message */
47
48 #define PBF_VAL_BLOB_RAW_DATA 1
49 #define PBF_VAL_BLOB_RAW_SIZE 2
50 #define PBF_VAL_BLOB_ZLIB_DATA 3
51
52 /* Inside a HeaderBlock message */
53
54 #define PBF_VAL_REQUIRED_FEATURES 4
55 #define PBF_VAL_OPTIONAL_FEATURES 5
56
57 /* Inside a PrimitiveBlock message */
58
59 #define PBF_VAL_STRING_TABLE 1
60 #define PBF_VAL_PRIMITIVE_GROUP 2
61 #define PBF_VAL_GRANULARITY 17
62 #define PBF_VAL_LAT_OFFSET 19
63 #define PBF_VAL_LON_OFFSET 20
64
65 /* Inside a PrimitiveGroup message */
66
67 #define PBF_VAL_NODES 1
68 #define PBF_VAL_DENSE_NODES 2
69 #define PBF_VAL_WAYS 3
70 #define PBF_VAL_RELATIONS 4
71
72 /* Inside a StringTable message */
73
74 #define PBF_VAL_STRING 1
75
76 /* Inside an Info or DenseInfo message */
77
78 #define PBF_VAL_VISIBLE 6
79
80 /* Inside a Node message */
81
82 #define PBF_VAL_NODE_ID 1
83 #define PBF_VAL_NODE_KEYS 2
84 #define PBF_VAL_NODE_VALS 3
85 #define PBF_VAL_NODE_INFO 4
86 #define PBF_VAL_NODE_LAT 8
87 #define PBF_VAL_NODE_LON 9
88
89 /* Inside a DenseNode message */
90
91 #define PBF_VAL_DENSE_NODE_ID 1
92 #define PBF_VAL_DENSE_NODE_INFO 5
93 #define PBF_VAL_DENSE_NODE_LAT 8
94 #define PBF_VAL_DENSE_NODE_LON 9
95 #define PBF_VAL_DENSE_NODE_KEYS_VALS 10
96
97 /* Inside a Way message */
98
99 #define PBF_VAL_WAY_ID 1
100 #define PBF_VAL_WAY_KEYS 2
101 #define PBF_VAL_WAY_VALS 3
102 #define PBF_VAL_WAY_INFO 4
103 #define PBF_VAL_WAY_REFS 8
104
105 /* Inside a Relation message */
106
107 #define PBF_VAL_RELATION_ID 1
108 #define PBF_VAL_RELATION_KEYS 2
109 #define PBF_VAL_RELATION_VALS 3
110 #define PBF_VAL_RELATION_INFO 4
111 #define PBF_VAL_RELATION_ROLES 8
112 #define PBF_VAL_RELATION_MEMIDS 9
113 #define PBF_VAL_RELATION_TYPES 10
114
115 /* Errors */
116
117 #define PBF_EOF 0
118
119 #define PBF_ERROR_UNEXP_EOF 100
120 #define PBF_ERROR_BLOB_HEADER_LEN 101
121 #define PBF_ERROR_BLOB_LEN 102
122 #define PBF_ERROR_NOT_OSM 103
123 #define PBF_ERROR_BLOB_BOTH 104
124 #define PBF_ERROR_BLOB_NEITHER 105
125 #define PBF_ERROR_NO_GZIP 106
126 #define PBF_ERROR_GZIP_INIT 107
127 #define PBF_ERROR_GZIP_INFLATE 108
128 #define PBF_ERROR_GZIP_WRONG_LEN 109
129 #define PBF_ERROR_GZIP_END 110
130 #define PBF_ERROR_UNSUPPORTED 111
131 #define PBF_ERROR_TOO_MANY_GROUPS 112
132
133
134 /* Parsing variables and functions */
135
136 static index_t nnodes=0,nways=0,nrelations=0;
137
138 static unsigned long long byteno;
139
140 static int buffer_allocated,zbuffer_allocated;
141 static unsigned char *buffer=NULL,*zbuffer=NULL;
142 static unsigned char *buffer_ptr,*buffer_end;
143
144 static int string_table_length=0,string_table_allocated=0;
145 static unsigned char **string_table=NULL;
146 static uint32_t *string_table_string_lengths=NULL;
147
148 static int32_t granularity=100;
149 static int64_t lat_offset=0,lon_offset=0;
150
151 static unsigned char *visibles=NULL;
152
153 static int mode_visible=MODE_NORMAL;
154
155 #define LENGTH_32M (32*1024*1024)
156
157
158 /*++++++++++++++++++++++++++++++++++++++
159 Refill the data buffer and set the pointers.
160
161 int buffer_refill Return 0 if everything is OK or 1 for EOF.
162
163 int fd The file descriptor to read from.
164
165 uint32_t bytes The number of bytes to read.
166 ++++++++++++++++++++++++++++++++++++++*/
167
168 static inline int buffer_refill(int fd,uint32_t bytes)
169 {
170 ssize_t n;
171
172 if(bytes>buffer_allocated)
173 buffer=(unsigned char *)realloc(buffer,buffer_allocated=bytes);
174
175 buffer_end=buffer;
176
177 do
178 {
179 n=read(fd,buffer_end,bytes);
180
181 if(n<=0)
182 return(1);
183
184 buffer_end+=n;
185 bytes-=n;
186 }
187 while(bytes>0);
188
189 buffer_ptr=buffer;
190
191 byteno+=bytes;
192
193 return(0);
194 }
195
196 #if defined(USE_GZIP) && USE_GZIP
197 static int uncompress_pbf(unsigned char *data,uint32_t compressed,uint32_t uncompressed);
198 #endif /* USE_GZIP */
199
200 static void process_string_table(unsigned char *data,uint32_t length);
201 static void process_primitive_group(unsigned char *data,uint32_t length);
202 static void process_nodes(unsigned char *data,uint32_t length);
203 static void process_dense_nodes(unsigned char *data,uint32_t length);
204 static void process_ways(unsigned char *data,uint32_t length);
205 static void process_relations(unsigned char *data,uint32_t length);
206 static void process_info(unsigned char *data,uint32_t length);
207 static void process_dense_info(unsigned char *data,uint32_t length);
208
209
210 /* Macros to simplify the parser (and make it look more like the XML parser) */
211
212 #define BEGIN(xx) do{ state=(xx); goto finish_parsing; } while(0)
213
214 #define BUFFER_CHARS_EOF(xx) do{ if(buffer_refill(fd,(xx))) BEGIN(PBF_EOF); } while(0)
215
216 #define BUFFER_CHARS(xx) do{ if(buffer_refill(fd,(xx))) BEGIN(PBF_ERROR_UNEXP_EOF); } while(0)
217
218
219 /* PBF decoding */
220
221 #define PBF_FIELD(xx) (int)(((xx)&0xFFF8)>>3)
222 #define PBF_TYPE(xx) (int)((xx)&0x0007)
223
224 #define PBF_LATITUDE(xx) (double)(1E-9*(granularity*(xx)+lat_offset))
225 #define PBF_LONGITUDE(xx) (double)(1E-9*(granularity*(xx)+lon_offset))
226
227
228 /*++++++++++++++++++++++++++++++++++++++
229 Parse a PBF int32 data value.
230
231 uint32_t pbf_int32 Returns the integer value.
232
233 unsigned char **ptr The pointer to read the data from.
234 ++++++++++++++++++++++++++++++++++++++*/
235
236 static inline uint32_t pbf_int32(unsigned char **ptr)
237 {
238 uint32_t result=(**ptr)&0x7F;
239
240 if((**ptr)&0x80) result+=((*++(*ptr))&0x7F)<<7;
241 if((**ptr)&0x80) result+=((*++(*ptr))&0x7F)<<14;
242 if((**ptr)&0x80) result+=((*++(*ptr))&0x7F)<<21;
243 if((**ptr)&0x80) result+=((*++(*ptr))&0x7F)<<28;
244
245 (*ptr)++;
246
247 return(result);
248 }
249
250
251 /*++++++++++++++++++++++++++++++++++++++
252 Parse a PBF int64 data value.
253
254 int64_t pbf_int64 Returns the integer value.
255
256 unsigned char **ptr The pointer to read the data from.
257 ++++++++++++++++++++++++++++++++++++++*/
258
259 static inline int64_t pbf_int64(unsigned char **ptr)
260 {
261 uint64_t result=(**ptr)&0x7F;
262
263 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<7;
264 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<14;
265 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<21;
266 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<28;
267 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<35;
268 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<42;
269 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<49;
270 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<56;
271 if((**ptr)&0x80) result+=(uint64_t)((*++(*ptr))&0x7F)<<63;
272
273 (*ptr)++;
274
275 return(result);
276 }
277
278
279 /*++++++++++++++++++++++++++++++++++++++
280 Parse a PBF sint64 data value.
281
282 int64_t pbf_sint64 Returns the integer value.
283
284 unsigned char **ptr The pointer to read the data from.
285 ++++++++++++++++++++++++++++++++++++++*/
286
287 static inline int64_t pbf_sint64(unsigned char **ptr)
288 {
289 int64_t result=((**ptr)&0x7E)>>1;
290 int sign=(**ptr)&0x01;
291
292 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<6;
293 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<13;
294 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<20;
295 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<27;
296 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<34;
297 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<41;
298 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<48;
299 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<55;
300 if((**ptr)&0x80) result+=(int64_t)((*++(*ptr))&0x7F)<<62;
301
302 (*ptr)++;
303
304 if(sign)
305 result=-result-1;
306
307 return(result);
308 }
309
310
311 /*++++++++++++++++++++++++++++++++++++++
312 Parse a PBF length delimited data value.
313
314 unsigned char *pbf_length_delimited Returns a pointer to the start of the data.
315
316 unsigned char **ptr The pointer to read the data from.
317
318 uint32_t *length Returns the length of the data.
319 ++++++++++++++++++++++++++++++++++++++*/
320
321 static inline unsigned char *pbf_length_delimited(unsigned char **ptr,uint32_t *length)
322 {
323 uint32_t len=pbf_int32(ptr);
324
325 if(length)
326 *length=len;
327
328 *ptr+=len;
329
330 return(*ptr-len);
331 }
332
333
334 /*++++++++++++++++++++++++++++++++++++++
335 Skip any pbf field from a message.
336
337 unsigned char **ptr The pointer to read the data from.
338
339 int type The type of the data.
340 ++++++++++++++++++++++++++++++++++++++*/
341
342 static inline void pbf_skip(unsigned char **ptr,int type)
343 {
344 uint32_t length;
345
346 switch(type)
347 {
348 case 0: /* varint */
349 while((**ptr)&0x80) (*ptr)++;
350 (*ptr)++;
351 break;
352 case 1: /* 64-bit */
353 *ptr+=8;
354 break;
355 case 2: /* length delimited */
356 length=pbf_int32(ptr);
357 *ptr+=length;
358 break;
359 case 3: /* deprecated */
360 break;
361 case 4: /* deprecated */
362 break;
363 case 5: /* 32-bit */
364 *ptr+=4;
365 break;
366 }
367 }
368
369
370 /*++++++++++++++++++++++++++++++++++++++
371 Parse the PBF and call the functions for each OSM item as seen.
372
373 int ParsePBF Returns 0 if OK or something else in case of an error.
374
375 in fd The file descriptor of the file to parse.
376
377 int changes Set to 1 if this is a changes file otherwise 0.
378 ++++++++++++++++++++++++++++++++++++++*/
379
380 int ParsePBF(int fd,int changes)
381 {
382 int state;
383 unsigned char *error=NULL;
384
385 /* Print the initial message */
386
387 nnodes=0,nways=0,nrelations=0;
388
389 printf_first("Reading: Bytes=0 Nodes=0 Ways=0 Relations=0");
390
391 /* The actual parser. */
392
393 mode_visible=changes?MODE_MODIFY:MODE_NORMAL;
394
395 string_table_allocated=16384;
396 string_table_length=0;
397 string_table=(unsigned char **)malloc(string_table_allocated);
398 string_table_string_lengths=(uint32_t *)malloc(string_table_allocated);
399
400 zbuffer_allocated=0;
401 zbuffer=NULL;
402
403 buffer_allocated=65536;
404 buffer=(unsigned char*)malloc(buffer_allocated);
405
406 buffer_ptr=buffer_end=buffer;
407
408 while(1)
409 {
410 int32_t blob_header_length=0;
411 int osm_data=0,osm_header=0;
412 int32_t blob_length=0;
413 uint32_t raw_size=0,compressed_size=0,uncompressed_size=0;
414 unsigned char *raw_data=NULL,*zlib_data=NULL;
415 uint32_t length;
416 unsigned char *data;
417
418 /* ================ Parsing states ================ */
419
420
421 BUFFER_CHARS_EOF(4);
422
423 blob_header_length=(256*(256*(256*(int)buffer_ptr[0])+(int)buffer_ptr[1])+(int)buffer_ptr[2])+buffer_ptr[3];
424 buffer_ptr+=4;
425
426 if(blob_header_length==0 || blob_header_length>LENGTH_32M)
427 BEGIN(PBF_ERROR_BLOB_HEADER_LEN);
428
429
430 BUFFER_CHARS(blob_header_length);
431
432 osm_header=0;
433 osm_data=0;
434
435 while(buffer_ptr<buffer_end)
436 {
437 int fieldtype=pbf_int32(&buffer_ptr);
438 int field=PBF_FIELD(fieldtype);
439
440 switch(field)
441 {
442 case PBF_VAL_BLOBHEADER_TYPE: /* string */
443 {
444 uint32_t length=0;
445 unsigned char *type=NULL;
446
447 type=pbf_length_delimited(&buffer_ptr,&length);
448
449 if(length==9 && !strncmp((char*)type,"OSMHeader",9))
450 osm_header=1;
451
452 if(length==7 && !strncmp((char*)type,"OSMData",7))
453 osm_data=1;
454 }
455 break;
456
457 case PBF_VAL_BLOBHEADER_SIZE: /* int32 */
458 blob_length=pbf_int32(&buffer_ptr);
459 break;
460
461 default:
462 pbf_skip(&buffer_ptr,PBF_TYPE(fieldtype));
463 }
464 }
465
466 if(blob_length==0 || blob_length>LENGTH_32M)
467 BEGIN(PBF_ERROR_BLOB_LEN);
468
469 if(!osm_data && !osm_header)
470 BEGIN(PBF_ERROR_NOT_OSM);
471
472
473 BUFFER_CHARS(blob_length);
474
475 while(buffer_ptr<buffer_end)
476 {
477 int fieldtype=pbf_int32(&buffer_ptr);
478 int field=PBF_FIELD(fieldtype);
479
480 switch(field)
481 {
482 case PBF_VAL_BLOB_RAW_DATA: /* bytes */
483 raw_data=pbf_length_delimited(&buffer_ptr,&raw_size);
484 break;
485
486 case PBF_VAL_BLOB_RAW_SIZE: /* int32 */
487 uncompressed_size=pbf_int32(&buffer_ptr);
488 break;
489
490 case PBF_VAL_BLOB_ZLIB_DATA: /* bytes */
491 zlib_data=pbf_length_delimited(&buffer_ptr,&compressed_size);
492 break;
493
494 default:
495 pbf_skip(&buffer_ptr,PBF_TYPE(fieldtype));
496 }
497 }
498
499 if(raw_data && zlib_data)
500 BEGIN(PBF_ERROR_BLOB_BOTH);
501
502 if(!raw_data && !zlib_data)
503 BEGIN(PBF_ERROR_BLOB_NEITHER);
504
505 if(zlib_data)
506 {
507 #if defined(USE_GZIP) && USE_GZIP
508 int newstate=uncompress_pbf(zlib_data,compressed_size,uncompressed_size);
509
510 if(newstate)
511 BEGIN(newstate);
512 #else
513 BEGIN(PBF_ERROR_NO_GZIP);
514 #endif
515 }
516 else
517 {
518 buffer_ptr=raw_data;
519 buffer_end=raw_data+raw_size;
520 }
521
522
523 if(osm_header)
524 {
525 while(buffer_ptr<buffer_end)
526 {
527 int fieldtype=pbf_int32(&buffer_ptr);
528 int field=PBF_FIELD(fieldtype);
529
530 switch(field)
531 {
532 case PBF_VAL_REQUIRED_FEATURES: /* string */
533 {
534 uint32_t length=0;
535 unsigned char *feature=NULL;
536
537 feature=pbf_length_delimited(&buffer_ptr,&length);
538
539 if(strncmp((char*)feature,"OsmSchema-V0.6",14) &&
540 strncmp((char*)feature,"DenseNodes",10) &&
541 strncmp((char*)feature,"HistoricalInformation",21))
542 {
543 feature[length]=0;
544 error=feature;
545 BEGIN(PBF_ERROR_UNSUPPORTED);
546 }
547 }
548 break;
549
550 case PBF_VAL_OPTIONAL_FEATURES: /* string */
551 pbf_length_delimited(&buffer_ptr,NULL);
552 break;
553
554 default:
555 pbf_skip(&buffer_ptr,PBF_TYPE(fieldtype));
556 }
557 }
558 }
559
560
561 if(osm_data)
562 {
563 unsigned char *primitive_group[8]={NULL};
564 uint32_t primitive_group_length[8]={0};
565 int nprimitive_groups=0,i;
566
567 granularity=100;
568 lat_offset=lon_offset=0;
569
570 while(buffer_ptr<buffer_end)
571 {
572 int fieldtype=pbf_int32(&buffer_ptr);
573 int field=PBF_FIELD(fieldtype);
574
575 switch(field)
576 {
577 case PBF_VAL_STRING_TABLE: /* bytes */
578 data=pbf_length_delimited(&buffer_ptr,&length);
579 process_string_table(data,length);
580 break;
581
582 case PBF_VAL_PRIMITIVE_GROUP: /* bytes */
583 primitive_group[nprimitive_groups]=pbf_length_delimited(&buffer_ptr,&primitive_group_length[nprimitive_groups]);
584
585 if(++nprimitive_groups>(sizeof(primitive_group)/sizeof(primitive_group[0])))
586 BEGIN(PBF_ERROR_TOO_MANY_GROUPS);
587 break;
588
589 case PBF_VAL_GRANULARITY: /* int32 */
590 granularity=pbf_int32(&buffer_ptr);
591 break;
592
593 case PBF_VAL_LAT_OFFSET: /* int64 */
594 lat_offset=pbf_int64(&buffer_ptr);
595 break;
596
597 case PBF_VAL_LON_OFFSET: /* int64 */
598 lon_offset=pbf_int64(&buffer_ptr);
599 break;
600
601 default:
602 pbf_skip(&buffer_ptr,PBF_TYPE(fieldtype));
603 }
604 }
605
606 if(nprimitive_groups)
607 for(i=0;i<nprimitive_groups;i++)
608 process_primitive_group(primitive_group[i],primitive_group_length[i]);
609 }
610 }
611
612
613 finish_parsing:
614
615 switch(state)
616 {
617 /* End of file */
618
619 case PBF_EOF:
620 break;
621
622
623 /* ================ Error states ================ */
624
625
626 case PBF_ERROR_UNEXP_EOF:
627 fprintf(stderr,"PBF Parser: Error at byte %llu: unexpected end of file seen.\n",byteno);
628 break;
629
630 case PBF_ERROR_BLOB_HEADER_LEN:
631 fprintf(stderr,"PBF Parser: Error at byte %llu: BlobHeader length is wrong (0<x<=32M).\n",byteno);
632 break;
633
634 case PBF_ERROR_BLOB_LEN:
635 fprintf(stderr,"PBF Parser: Error at byte %llu: Blob length is wrong (0<x<=32M).\n",byteno);
636 break;
637
638 case PBF_ERROR_NOT_OSM:
639 fprintf(stderr,"PBF Parser: Error at byte %llu: BlobHeader is neither 'OSMData' or 'OSMHeader'.\n",byteno);
640 break;
641
642 case PBF_ERROR_BLOB_BOTH:
643 fprintf(stderr,"PBF Parser: Error at byte %llu: Blob has both zlib compressed and raw uncompressed data.\n",byteno);
644 break;
645
646 case PBF_ERROR_BLOB_NEITHER:
647 fprintf(stderr,"PBF Parser: Error at byte %llu: Blob has neither zlib compressed or raw uncompressed data.\n",byteno);
648 break;
649
650 case PBF_ERROR_NO_GZIP:
651 fprintf(stderr,"PBF Parser: Error at byte %llu: Blob is compressed but no gzip support is available.\n",byteno);
652 break;
653
654 case PBF_ERROR_GZIP_INIT:
655 fprintf(stderr,"PBF Parser: Error at byte %llu: Blob is compressed but failed to initialise decompression.\n",byteno);
656 break;
657
658 case PBF_ERROR_GZIP_INFLATE:
659 fprintf(stderr,"PBF Parser: Error at byte %llu: Blob is compressed but failed to uncompress it.\n",byteno);
660 break;
661
662 case PBF_ERROR_GZIP_WRONG_LEN:
663 fprintf(stderr,"PBF Parser: Error at byte %llu: Blob is compressed and wrong size when uncompressed.\n",byteno);
664 break;
665
666 case PBF_ERROR_GZIP_END:
667 fprintf(stderr,"PBF Parser: Error at byte %llu: Blob is compressed but failed to finalise decompression.\n",byteno);
668 break;
669
670 case PBF_ERROR_UNSUPPORTED:
671 fprintf(stderr,"PBF Parser: Error at byte %llu: Unsupported required feature '%s'.\n",byteno,error);
672 break;
673
674 case PBF_ERROR_TOO_MANY_GROUPS:
675 fprintf(stderr,"PBF Parser: Error at byte %llu: OsmData message contains too many PrimitiveGroup messages.\n",byteno);
676 break;
677 }
678
679 /* Free the parser variables */
680
681 free(string_table);
682 free(string_table_string_lengths);
683
684 free(buffer);
685 if(zbuffer)
686 free(zbuffer);
687
688 /* Print the final message */
689
690 printf_last("Read: Bytes=%llu Nodes=%"Pindex_t" Ways=%"Pindex_t" Relations=%"Pindex_t,byteno,nnodes,nways,nrelations);
691
692 return(state);
693 }
694
695
696 /*++++++++++++++++++++++++++++++++++++++
697 Process a PBF StringTable message.
698
699 unsigned char *data The data to process.
700
701 uint32_t length The length of the data.
702 ++++++++++++++++++++++++++++++++++++++*/
703
704 static void process_string_table(unsigned char *data,uint32_t length)
705 {
706 unsigned char *end=data+length;
707 unsigned char *string;
708 uint32_t string_length;
709
710 string_table_length=0;
711
712 while(data<end)
713 {
714 int fieldtype=pbf_int32(&data);
715 int field=PBF_FIELD(fieldtype);
716
717 switch(field)
718 {
719 case PBF_VAL_STRING: /* string */
720 string=pbf_length_delimited(&data,&string_length);
721
722 if(string_table_length==string_table_allocated)
723 {
724 string_table=(unsigned char **)realloc(string_table,string_table_allocated+=8192);
725 string_table_string_lengths=(uint32_t *)realloc(string_table_string_lengths,string_table_allocated);
726 }
727
728 string_table[string_table_length]=string;
729 string_table_string_lengths[string_table_length]=string_length;
730
731 string_table_length++;
732 break;
733
734 default:
735 pbf_skip(&data,PBF_TYPE(fieldtype));
736 }
737 }
738 }
739
740
741 /*++++++++++++++++++++++++++++++++++++++
742 Process a PBF PrimitiveGroup message.
743
744 unsigned char *data The data to process.
745
746 uint32_t length The length of the data.
747 ++++++++++++++++++++++++++++++++++++++*/
748
749 static void process_primitive_group(unsigned char *data,uint32_t length)
750 {
751 unsigned char *end=data+length;
752 unsigned char *subdata;
753 uint32_t sublength;
754 int i;
755
756 /* Fixup the strings (not null terminated in buffer) */
757
758 for(i=0;i<string_table_length;i++)
759 string_table[i][string_table_string_lengths[i]]=0;
760
761
762 while(data<end)
763 {
764 int fieldtype=pbf_int32(&data);
765 int field=PBF_FIELD(fieldtype);
766
767 switch(field)
768 {
769 case PBF_VAL_NODES: /* message */
770 subdata=pbf_length_delimited(&data,&sublength);
771 process_nodes(subdata,sublength);
772 break;
773
774 case PBF_VAL_DENSE_NODES: /* message */
775 subdata=pbf_length_delimited(&data,&sublength);
776 process_dense_nodes(subdata,sublength);
777 break;
778
779 case PBF_VAL_WAYS: /* message */
780 subdata=pbf_length_delimited(&data,&sublength);
781 process_ways(subdata,sublength);
782 break;
783
784 case PBF_VAL_RELATIONS: /* message */
785 subdata=pbf_length_delimited(&data,&sublength);
786 process_relations(subdata,sublength);
787 break;
788
789 default:
790 pbf_skip(&data,PBF_TYPE(fieldtype));
791 }
792 }
793 }
794
795
796 /*++++++++++++++++++++++++++++++++++++++
797 Process a PBF Node message.
798
799 unsigned char *data The data to process.
800
801 uint32_t length The length of the data.
802 ++++++++++++++++++++++++++++++++++++++*/
803
804 static void process_nodes(unsigned char *data,uint32_t length)
805 {
806 unsigned char *end=data+length;
807 int64_t id=0;
808 node_t node_id;
809 unsigned char *keys=NULL,*vals=NULL,*infos=NULL;
810 unsigned char *keys_end=NULL,*vals_end=NULL;
811 uint32_t keylen=0,vallen=0,infolen;
812 int64_t lat=0,lon=0;
813 TagList *tags=NULL,*result=NULL;
814 int visible=1;
815
816 visibles=NULL;
817
818 while(data<end)
819 {
820 int fieldtype=pbf_int32(&data);
821 int field=PBF_FIELD(fieldtype);
822
823 switch(field)
824 {
825 case PBF_VAL_NODE_ID: /* sint64 */
826 id=pbf_sint64(&data);
827 break;
828
829 case PBF_VAL_NODE_KEYS: /* packed int32 */
830 keys=pbf_length_delimited(&data,&keylen);
831 keys_end=keys+keylen;
832 break;
833
834 case PBF_VAL_NODE_VALS: /* packed int32 */
835 vals=pbf_length_delimited(&data,&vallen);
836 vals_end=vals+vallen;
837 break;
838
839 case PBF_VAL_NODE_INFO: /* message */
840 infos=pbf_length_delimited(&data,&infolen);
841 process_info(infos,infolen);
842 break;
843
844 case PBF_VAL_NODE_LAT: /* sint64 */
845 lat=pbf_sint64(&data);
846 break;
847
848 case PBF_VAL_NODE_LON: /* sint64 */
849 lon=pbf_sint64(&data);
850 break;
851
852 default:
853 pbf_skip(&data,PBF_TYPE(fieldtype));
854 }
855 }
856
857 /* Mangle the data and send it to the OSM parser */
858
859 nnodes++;
860
861 if(!(nnodes%10000))
862 printf_middle("Reading: Bytes=%llu Nodes=%"Pindex_t" Ways=%"Pindex_t" Relations=%"Pindex_t,byteno,nnodes,nways,nrelations);
863
864 node_id=(node_t)id;
865 logassert((long long)node_id==id,"Node ID too large (change node_t to 64-bits?)"); /* check node id can be stored in node_t data type. */
866
867 tags=NewTagList();
868
869 if(keys && vals)
870 {
871 while(keys<keys_end && vals<vals_end)
872 {
873 uint32_t key=pbf_int32(&keys);
874 uint32_t val=pbf_int32(&vals);
875
876 AppendTag(tags,(char*)string_table[key],(char*)string_table[val]);
877 }
878 }
879
880 if(visibles)
881 visible=pbf_int32(&visibles);
882
883 result=ApplyNodeTaggingRules(tags,node_id);
884
885 ProcessNodeTags(result,node_id,PBF_LATITUDE(lat),PBF_LONGITUDE(lon),visible?mode_visible:MODE_DELETE);
886
887 DeleteTagList(tags);
888 DeleteTagList(result);
889 }
890
891
892 /*++++++++++++++++++++++++++++++++++++++
893 Process a PBF DenseNode message.
894
895 unsigned char *data The data to process.
896
897 uint32_t length The length of the data.
898 ++++++++++++++++++++++++++++++++++++++*/
899
900 static void process_dense_nodes(unsigned char *data,uint32_t length)
901 {
902 unsigned char *end=data+length;
903 unsigned char *ids=NULL,*infos,*keys_vals=NULL,*lats=NULL,*lons=NULL;
904 unsigned char *ids_end=NULL;
905 uint32_t idlen=0,infolen=0;
906 int64_t id=0;
907 node_t node_id;
908 int64_t lat=0,lon=0;
909 TagList *tags=NULL,*result;
910 int visible=1;
911
912 visibles=NULL;
913
914 while(data<end)
915 {
916 int fieldtype=pbf_int32(&data);
917 int field=PBF_FIELD(fieldtype);
918
919 switch(field)
920 {
921 case PBF_VAL_DENSE_NODE_ID: /* packed sint64 */
922 ids=pbf_length_delimited(&data,&idlen);
923 ids_end=ids+idlen;
924 break;
925
926 case PBF_VAL_DENSE_NODE_INFO: /* message */
927 infos=pbf_length_delimited(&data,&infolen);
928 process_dense_info(infos,infolen);
929 break;
930
931 case PBF_VAL_DENSE_NODE_LAT: /* packed sint64 */
932 lats=pbf_length_delimited(&data,NULL);
933 break;
934
935 case PBF_VAL_DENSE_NODE_LON: /* packed sint64 */
936 lons=pbf_length_delimited(&data,NULL);
937 break;
938
939 case PBF_VAL_DENSE_NODE_KEYS_VALS: /* packed int32 */
940 keys_vals=pbf_length_delimited(&data,NULL);
941 break;
942
943 default:
944 pbf_skip(&data,PBF_TYPE(fieldtype));
945 }
946 }
947
948 while(ids<ids_end)
949 {
950 int64_t delta_id;
951 int64_t delta_lat,delta_lon;
952
953 delta_id=pbf_sint64(&ids);
954 delta_lat=pbf_sint64(&lats);
955 delta_lon=pbf_sint64(&lons);
956
957 id+=delta_id;
958 lat+=delta_lat;
959 lon+=delta_lon;
960
961 /* Mangle the data and send it to the OSM parser */
962
963 nnodes++;
964
965 if(!(nnodes%10000))
966 printf_middle("Reading: Bytes=%llu Nodes=%"Pindex_t" Ways=%"Pindex_t" Relations=%"Pindex_t,byteno,nnodes,nways,nrelations);
967
968 node_id=(node_t)id;
969 logassert((long long)node_id==id,"Node ID too large (change node_t to 64-bits?)"); /* check node id can be stored in node_t data type. */
970
971 tags=NewTagList();
972
973 if(keys_vals)
974 {
975 while(1)
976 {
977 uint32_t key=pbf_int32(&keys_vals),val;
978
979 if(key==0)
980 break;
981
982 val=pbf_int32(&keys_vals);
983
984 AppendTag(tags,(char*)string_table[key],(char*)string_table[val]);
985 }
986 }
987
988 if(visibles)
989 visible=pbf_int32(&visibles);
990
991 result=ApplyNodeTaggingRules(tags,node_id);
992
993 ProcessNodeTags(result,node_id,PBF_LATITUDE(lat),PBF_LONGITUDE(lon),visible?mode_visible:MODE_DELETE);
994
995 DeleteTagList(tags);
996 DeleteTagList(result);
997 }
998 }
999
1000
1001 /*++++++++++++++++++++++++++++++++++++++
1002 Process a PBF Way message.
1003
1004 unsigned char *data The data to process.
1005
1006 uint32_t length The length of the data.
1007 ++++++++++++++++++++++++++++++++++++++*/
1008
1009 static void process_ways(unsigned char *data,uint32_t length)
1010 {
1011 unsigned char *end=data+length;
1012 int64_t id=0;
1013 way_t way_id;
1014 unsigned char *keys=NULL,*vals=NULL,*infos=NULL,*refs=NULL;
1015 unsigned char *keys_end=NULL,*vals_end=NULL,*refs_end=NULL;
1016 uint32_t keylen=0,vallen=0,infolen=0,reflen=0;
1017 int64_t ref=0;
1018 TagList *tags=NULL,*result;
1019 int visible=1;
1020
1021 visibles=NULL;
1022
1023 while(data<end)
1024 {
1025 int fieldtype=pbf_int32(&data);
1026 int field=PBF_FIELD(fieldtype);
1027
1028 switch(field)
1029 {
1030 case PBF_VAL_WAY_ID: /* int64 */
1031 id=pbf_int64(&data);
1032 break;
1033
1034 case PBF_VAL_WAY_KEYS: /* packed int32 */
1035 keys=pbf_length_delimited(&data,&keylen);
1036 keys_end=keys+keylen;
1037 break;
1038
1039 case PBF_VAL_WAY_VALS: /* packed int32 */
1040 vals=pbf_length_delimited(&data,&vallen);
1041 vals_end=vals+vallen;
1042 break;
1043
1044 case PBF_VAL_WAY_INFO: /* message */
1045 infos=pbf_length_delimited(&data,&infolen);
1046 process_info(infos,infolen);
1047 break;
1048
1049 case PBF_VAL_WAY_REFS: /* packed sint64 */
1050 refs=pbf_length_delimited(&data,&reflen);
1051 refs_end=refs+reflen;
1052 break;
1053
1054 default:
1055 pbf_skip(&data,PBF_TYPE(fieldtype));
1056 }
1057 }
1058
1059 /* Mangle the data and send it to the OSM parser */
1060
1061 nways++;
1062
1063 if(!(nways%1000))
1064 printf_middle("Reading: Bytes=%llu Nodes=%"Pindex_t" Ways=%"Pindex_t" Relations=%"Pindex_t,byteno,nnodes,nways,nrelations);
1065
1066 way_id=(way_t)id;
1067 logassert((long long)way_id==id,"Way ID too large (change way_t to 64-bits?)"); /* check way id can be stored in way_t data type. */
1068
1069 tags=NewTagList();
1070
1071 if(keys && vals)
1072 {
1073 while(keys<keys_end && vals<vals_end)
1074 {
1075 uint32_t key=pbf_int32(&keys);
1076 uint32_t val=pbf_int32(&vals);
1077
1078 AppendTag(tags,(char*)string_table[key],(char*)string_table[val]);
1079 }
1080 }
1081
1082 osmparser_way_nnodes=0;
1083
1084 if(refs)
1085 while(refs<refs_end)
1086 {
1087 int64_t delta_ref;
1088 node_t node_id;
1089
1090 delta_ref=pbf_sint64(&refs);
1091
1092 ref+=delta_ref;
1093
1094 if(ref==0)
1095 break;
1096
1097 if(osmparser_way_nnodes && (osmparser_way_nnodes%256)==0)
1098 osmparser_way_nodes=(node_t*)realloc((void*)osmparser_way_nodes,(osmparser_way_nnodes+256)*sizeof(node_t));
1099
1100 node_id=(node_t)ref;
1101 logassert((long long)node_id==ref,"Node ID too large (change node_t to 64-bits?)"); /* check node id can be stored in node_t data type. */
1102
1103 osmparser_way_nodes[osmparser_way_nnodes++]=node_id;
1104 }
1105
1106 if(visibles)
1107 visible=pbf_int32(&visibles);
1108
1109 result=ApplyWayTaggingRules(tags,way_id);
1110
1111 ProcessWayTags(result,way_id,visible?mode_visible:MODE_DELETE);
1112
1113 DeleteTagList(tags);
1114 DeleteTagList(result);
1115 }
1116
1117
1118 /*++++++++++++++++++++++++++++++++++++++
1119 Process a PBF Relation message.
1120
1121 unsigned char *data The data to process.
1122
1123 uint32_t length The length of the data.
1124 ++++++++++++++++++++++++++++++++++++++*/
1125
1126 static void process_relations(unsigned char *data,uint32_t length)
1127 {
1128 unsigned char *end=data+length;
1129 int64_t id=0;
1130 relation_t relation_id;
1131 unsigned char *keys=NULL,*vals=NULL,*infos=NULL,*roles=NULL,*memids=NULL,*types=NULL;
1132 unsigned char *keys_end=NULL,*vals_end=NULL,*memids_end=NULL,*types_end=NULL;
1133 uint32_t keylen=0,vallen=0,infolen=0,rolelen=0,memidlen=0,typelen=0;
1134 int64_t memid=0;
1135 TagList *tags=NULL,*result;
1136 int visible=1;
1137
1138 visibles=NULL;
1139
1140 while(data<end)
1141 {
1142 int fieldtype=pbf_int32(&data);
1143 int field=PBF_FIELD(fieldtype);
1144
1145 switch(field)
1146 {
1147 case PBF_VAL_RELATION_ID: /* int64 */
1148 id=pbf_int64(&data);
1149 break;
1150
1151 case PBF_VAL_RELATION_KEYS: /* packed string */
1152 keys=pbf_length_delimited(&data,&keylen);
1153 keys_end=keys+keylen;
1154 break;
1155
1156 case PBF_VAL_RELATION_VALS: /* packed string */
1157 vals=pbf_length_delimited(&data,&vallen);
1158 vals_end=vals+vallen;
1159 break;
1160
1161 case PBF_VAL_RELATION_INFO: /* message */
1162 infos=pbf_length_delimited(&data,&infolen);
1163 process_info(infos,infolen);
1164 break;
1165
1166 case PBF_VAL_RELATION_ROLES: /* packed int32 */
1167 roles=pbf_length_delimited(&data,&rolelen);
1168 break;
1169
1170 case PBF_VAL_RELATION_MEMIDS: /* packed sint64 */
1171 memids=pbf_length_delimited(&data,&memidlen);
1172 memids_end=memids+memidlen;
1173 break;
1174
1175 case PBF_VAL_RELATION_TYPES: /* packed enum */
1176 types=pbf_length_delimited(&data,&typelen);
1177 types_end=types+typelen;
1178 break;
1179
1180 default:
1181 pbf_skip(&data,PBF_TYPE(fieldtype));
1182 }
1183 }
1184
1185 /* Mangle the data and send it to the OSM parser */
1186
1187 nrelations++;
1188
1189 if(!(nrelations%1000))
1190 printf_middle("Reading: Bytes=%llu Nodes=%"Pindex_t" Ways=%"Pindex_t" Relations=%"Pindex_t,byteno,nnodes,nways,nrelations);
1191
1192 relation_id=(relation_t)id;
1193 logassert((long long)relation_id==id,"Relation ID too large (change relation_t to 64-bits?)"); /* check relation id can be stored in relation_t data type. */
1194
1195 osmparser_relation_nnodes=osmparser_relation_nways=osmparser_relation_nrelations=0;
1196
1197 osmparser_relation_from=NO_WAY_ID;
1198 osmparser_relation_to=NO_WAY_ID;
1199 osmparser_relation_via=NO_NODE_ID;
1200
1201 tags=NewTagList();
1202
1203 if(keys && vals)
1204 {
1205 while(keys<keys_end && vals<vals_end)
1206 {
1207 uint32_t key=pbf_int32(&keys);
1208 uint32_t val=pbf_int32(&vals);
1209
1210 AppendTag(tags,(char*)string_table[key],(char*)string_table[val]);
1211 }
1212 }
1213
1214 if(memids && types)
1215 while(memids<memids_end && types<types_end)
1216 {
1217 int64_t delta_memid;
1218 unsigned char *role=NULL;
1219 int type;
1220
1221 delta_memid=pbf_sint64(&memids);
1222 type=pbf_int32(&types);
1223
1224 if(roles)
1225 role=string_table[pbf_int32(&roles)];
1226
1227 memid+=delta_memid;
1228
1229 if(type==0)
1230 {
1231 node_t node_id;
1232
1233 node_id=(node_t)memid;
1234 logassert((long long)node_id==memid,"Node ID too large (change node_t to 64-bits?)"); /* check node id can be stored in node_t data type. */
1235
1236 if(osmparser_relation_nnodes && (osmparser_relation_nnodes%256)==0)
1237 osmparser_relation_nodes=(node_t*)realloc((void*)osmparser_relation_nodes,(osmparser_relation_nnodes+256)*sizeof(node_t));
1238
1239 osmparser_relation_nodes[osmparser_relation_nnodes++]=node_id;
1240
1241 if(role)
1242 {
1243 if(!strcmp((char*)role,"via"))
1244 osmparser_relation_via=node_id;
1245 }
1246 }
1247 else if(type==1)
1248 {
1249 way_t way_id;
1250
1251 way_id=(way_t)memid;
1252 logassert((long long)way_id==memid,"Way ID too large (change way_t to 64-bits?)"); /* check way id can be stored in way_t data type. */
1253
1254 if(osmparser_relation_nways && (osmparser_relation_nways%256)==0)
1255 osmparser_relation_ways=(way_t*)realloc((void*)osmparser_relation_ways,(osmparser_relation_nways+256)*sizeof(way_t));
1256
1257 osmparser_relation_ways[osmparser_relation_nways++]=way_id;
1258
1259 if(role)
1260 {
1261 if(!strcmp((char*)role,"from"))
1262 osmparser_relation_from=way_id;
1263 if(!strcmp((char*)role,"to"))
1264 osmparser_relation_to=way_id;
1265 }
1266 }
1267 else if(type==2)
1268 {
1269 relation_t relation_id;
1270
1271 relation_id=(relation_t)memid;
1272 logassert((long long)relation_id==memid,"Relation ID too large (change relation_t to 64-bits?)"); /* check relation id can be stored in relation_t data type. */
1273
1274 if(osmparser_relation_nrelations && (osmparser_relation_nrelations%256)==0)
1275 osmparser_relation_relations=(relation_t*)realloc((void*)osmparser_relation_relations,(osmparser_relation_nrelations+256)*sizeof(relation_t));
1276
1277 osmparser_relation_relations[osmparser_relation_nrelations++]=relation_id;
1278 }
1279 }
1280
1281 if(visibles)
1282 visible=pbf_int32(&visibles);
1283
1284 result=ApplyRelationTaggingRules(tags,relation_id);
1285
1286 ProcessRelationTags(result,relation_id,visible?mode_visible:MODE_DELETE);
1287
1288 DeleteTagList(tags);
1289 DeleteTagList(result);
1290 }
1291
1292
1293 /*++++++++++++++++++++++++++++++++++++++
1294 Process a PBF Info message.
1295
1296 unsigned char *data The data to process.
1297
1298 uint32_t length The length of the data.
1299 ++++++++++++++++++++++++++++++++++++++*/
1300
1301 static void process_info(unsigned char *data,uint32_t length)
1302 {
1303 unsigned char *end=data+length;
1304
1305 while(data<end)
1306 {
1307 int fieldtype=pbf_int32(&data);
1308 int field=PBF_FIELD(fieldtype);
1309
1310 switch(field)
1311 {
1312 case PBF_VAL_VISIBLE: /* bool */
1313 visibles=data;
1314 pbf_int32(&data);
1315 break;
1316
1317 default:
1318 pbf_skip(&data,PBF_TYPE(fieldtype));
1319 }
1320 }
1321 }
1322
1323
1324 /*++++++++++++++++++++++++++++++++++++++
1325 Process a PBF DenseInfo message.
1326
1327 unsigned char *data The data to process.
1328
1329 uint32_t length The length of the data.
1330 ++++++++++++++++++++++++++++++++++++++*/
1331
1332 static void process_dense_info(unsigned char *data,uint32_t length)
1333 {
1334 unsigned char *end=data+length;
1335
1336 while(data<end)
1337 {
1338 int fieldtype=pbf_int32(&data);
1339 int field=PBF_FIELD(fieldtype);
1340
1341 switch(field)
1342 {
1343 case PBF_VAL_VISIBLE: /* packed bool */
1344 visibles=pbf_length_delimited(&data,NULL);
1345 break;
1346
1347 default:
1348 pbf_skip(&data,PBF_TYPE(fieldtype));
1349 }
1350 }
1351 }
1352
1353
1354 #if defined(USE_GZIP) && USE_GZIP
1355
1356 /*++++++++++++++++++++++++++++++++++++++
1357 Uncompress the part of the PBF data that is compressed.
1358
1359 int uncompress_pbf Returns the error state or 0 if OK.
1360
1361 unsigned char *data The data to uncompress.
1362
1363 uint32_t compressed The number of bytes to uncompress.
1364
1365 uint32_t uncompressed The number of bytes expected when uncompressed.
1366 ++++++++++++++++++++++++++++++++++++++*/
1367
1368 static int uncompress_pbf(unsigned char *data,uint32_t compressed,uint32_t uncompressed)
1369 {
1370 z_stream z={0};
1371
1372 if(uncompressed>zbuffer_allocated)
1373 zbuffer=(unsigned char *)realloc(zbuffer,zbuffer_allocated=uncompressed);
1374
1375 if(inflateInit2(&z,15+32)!=Z_OK)
1376 return(PBF_ERROR_GZIP_INIT);
1377
1378 z.next_in=data;
1379 z.avail_in=compressed;
1380
1381 z.next_out=zbuffer;
1382 z.avail_out=uncompressed;
1383
1384 if(inflate(&z,Z_FINISH)!=Z_STREAM_END)
1385 return(PBF_ERROR_GZIP_INFLATE);
1386
1387 if(z.avail_out!=0)
1388 return(PBF_ERROR_GZIP_WRONG_LEN);
1389
1390 if(inflateEnd(&z)!=Z_OK)
1391 return(PBF_ERROR_GZIP_END);
1392
1393 buffer_ptr=zbuffer;
1394 buffer_end=zbuffer+uncompressed;
1395
1396 return(0);
1397 }
1398
1399 #endif /* USE_GZIP */