Instantly share code, notes, and snippets.
Last active
August 10, 2021 13:04
-
Star
0
(0)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
Save kenji4569/7e11c73894ba925f2d6d43727f442d3e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "72391778", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"--- node tags ---\n", | |
"[(\"name\", 337484), (\"source\", 213180), (\"highway\", 185851), (\"amenity\", 163303), (\"name:en\", 103501), (\"name:ja\", 74679), (\"shop\", 60935), (\"operator\", 56152), (\"public_transport\", 52231), (\"natural\", 46208), (\"power\", 42221), (\"note\", 41456), (\"bus\", 39772), (\"source_ref\", 38462), (\"crossing\", 32619), (\"railway\", 32216), (\"place\", 31430), (\"brand\", 31110), (\"barrier\", 29033), (\"note:ja\", 28777), (\"brand:wikidata\", 25806), (\"cuisine\", 23996), (\"ref\", 23961), (\"brand:en\", 23766), (\"brand:ja\", 23382), (\"brand:wikipedia\", 22984), (\"name:ja_rm\", 22925), (\"opening_hours\", 20177), (\"tourism\", 19656), (\"name:ja-Hira\", 19612), (\"level\", 18624), (\"website\", 17065), (\"KSJ2:ADS\", 16182), (\"KSJ2:PubFacAdmin\", 16181), (\"phone\", 16007), (\"branch\", 15789), (\"addr:postcode\", 13339), (\"wheelchair\", 12812), (\"name:ja_kana\", 11452), (\"name:ko\", 11162), (\"information\", 11027), (\"traffic_signals\", 10694), (\"addr:housenumber\", 10547), (\"wikidata\", 9829), (\"emergency\", 9720), (\"addr:city\", 9617), (\"KSJ2:AdminArea\", 9382), (\"religion\", 8930), (\"wikipedia\", 8319), (\"addr:quarter\", 8253), (\"addr:province\", 8224), (\"office\", 8200), (\"healthcare\", 8131), (\"historic\", 8114), (\"vending\", 7285), (\"KSJ2:curve_id\", 7115), (\"noexit\", 7084), (\"addr:full\", 6618), (\"shelter\", 6554), (\"takeaway\", 6445), (\"addr:neighbourhood\", 6300), (\"addr:block_number\", 5966), (\"social_facility\", 5770), (\"bicycle\", 5666), (\"KSJ2:filename\", 5598), (\"leisure\", 5397), (\"man_made\", 5299), (\"denotation\", 5235), (\"leaf_type\", 5200), (\"entrance\", 5126), (\"official_name:en\", 4921), (\"bench\", 4791), (\"direction\", 4766), (\"created_by\", 4757), (\"name:ja-Latn\", 4697), (\"internet_access\", 4686), (\"description\", 4273), (\"material\", 4232), (\"access\", 4229), (\"backrest\", 3797), (\"train\", 3789), (\"leaf_cycle\", 3668), (\"atm\", 3503), (\"name:es\", 3463), (\"covered\", 3462), (\"local_ref\", 3321), (\"network\", 3245), (\"foot\", 3120), (\"fee\", 3115)]\n", | |
"\n", | |
"--- way tags ---\n", | |
"[(\"building\", 3212646), (\"source\", 2575111), (\"highway\", 1822317), (\"yh:WIDTH\", 321061), (\"name\", 273654), (\"source_ref\", 193750), (\"surface\", 155667), (\"landuse\", 150326), (\"yh:TYPE\", 135061), (\"yh:STRUCTURE\", 135060), (\"yh:TOTYUMONO\", 124827), (\"yh:WIDTH_RANK\", 124806), (\"oneway\", 119129), (\"layer\", 119014), (\"service\", 117376), (\"amenity\", 116859), (\"name:en\", 95188), (\"building:levels\", 94666), (\"name:ja\", 88999), (\"lanes\", 86388), (\"footway\", 82361), (\"natural\", 78738), (\"note\", 70762), (\"ref\", 70659), (\"bridge\", 67807), (\"waterway\", 56311), (\"leisure\", 50984), (\"maxspeed\", 50347), (\"parking\", 40504), (\"access\", 37432), (\"source:ja\", 36462), (\"note:ja\", 34493), (\"operator\", 33290), (\"railway\", 31563), (\"bicycle\", 29245), (\"foot\", 28947), (\"crossing\", 28847), (\"tunnel\", 28527), (\"voltage\", 27168), (\"KSJ2:curve_id\", 26409), (\"gauge\", 25709), (\"frequency\", 25062), (\"electrified\", 24819), (\"name:es\", 24614), (\"est_width\", 23944), (\"name:ja_rm\", 23739), (\"barrier\", 23070), (\"addr:block_number\", 20069), (\"addr:quarter\", 19876), (\"addr:neighbourhood\", 18604), (\"operator:en\", 17472), (\"tracktype\", 17152), (\"usage\", 17136), (\"official_name\", 16523), (\"operator:ja\", 15861), (\"area\", 14267), (\"addr:city\", 14074), (\"addr:housenumber\", 13701), (\"name:ja-Latn\", 13189), (\"smoothness\", 13027), (\"addr:province\", 12833), (\"toll\", 12281), (\"name:ja-Hira\", 12037), (\"addr:postcode\", 11413), (\"shop\", 11368), (\"water\", 11019), (\"motorcar\", 10743), (\"motorcycle\", 10657), (\"name:ko\", 10571), (\"roof:shape\", 10538), (\"sport\", 9846), (\"noname\", 9804), (\"wikidata\", 9778), (\"level\", 9602), (\"source:geometry\", 9383), (\"name:ja_kana\", 9380), (\"golf\", 9228), (\"man_made\", 9188), (\"admin_level\", 8907), (\"power\", 8155), (\"KSJ2:DFD\", 8109), (\"KSJ2:WSC\", 8109), (\"KSJ2:RIN\", 8109), (\"KSJ2:RIC\", 8109), (\"KSJ2:river_id\", 8109), (\"KSJ2:LOC\", 8109), (\"KSJ2:COP_label\", 8109), (\"KSJ2:filename\", 8089), (\"building:colour\", 7792), (\"operator:ja_rm\", 7700), (\"wikipedia\", 7680), (\"height\", 7632), (\"fee\", 7614), (\"incline\", 7497), (\"boundary\", 7330), (\"nat_name\", 7300), (\"roof:colour\", 7016), (\"width\", 6878), (\"brand\", 6787), (\"colour\", 6748), (\"nat_name:en\", 6701), (\"religion\", 6385), (\"lit\", 6383), (\"route\", 6227), (\"building:part\", 6025), (\"sidewalk\", 5953), (\"name:de\", 5943), (\"name:ru\", 5928), (\"brand:wikidata\", 5451), (\"passenger_lines\", 5231), (\"brand:en\", 5100), (\"brand:ja\", 4956), (\"brand:wikipedia\", 4942), (\"alt_name\", 4324), (\"tactile_paving\", 4319), (\"motor_vehicle\", 4277), (\"website\", 4217), (\"opening_hours\", 4198), (\"branch\", 4144), (\"railway:traffic_mode\", 4110), (\"covered\", 3985), (\"bridge:name\", 3871), (\"nat_name:ja\", 3735), (\"building:material\", 3711), (\"indoor\", 3638), (\"segregated\", 3566), (\"embankment\", 3492), (\"capacity\", 3368), (\"public_transport\", 3232), (\"phone\", 3216), (\"addr:suburb\", 3213), (\"aeroway\", 3132), (\"oneway:bicycle\", 3028)]\n", | |
"\n", | |
"--- relation tags ---\n", | |
"[(\"type\", 27449), (\"name\", 14859), (\"operator\", 7686), (\"ref\", 7533), (\"route\", 7126), (\"public_transport:version\", 4878), (\"network\", 4584), (\"name:en\", 4204), (\"natural\", 3512), (\"name:ja\", 3437), (\"source\", 3434), (\"restriction\", 3310), (\"building\", 3206), (\"from\", 3010)]\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"extern crate osmpbfreader;\n", | |
"use std::collections::HashMap;\n", | |
"\n", | |
"let filename = \"./kanto-latest.osm.pbf\";\n", | |
"let path = std::path::Path::new(filename);\n", | |
"let r = std::fs::File::open(&path).unwrap();\n", | |
"let mut pbf = osmpbfreader::OsmPbfReader::new(r);\n", | |
"\n", | |
"type TagMap = HashMap::<String, Vec<String>>;\n", | |
"let mut node_tags = TagMap::new();\n", | |
"let mut way_tags = TagMap::new();\n", | |
"let mut relation_tags = TagMap::new();\n", | |
"\n", | |
"for obj in pbf.par_iter().map(Result::unwrap) {\n", | |
" match obj {\n", | |
" osmpbfreader::OsmObj::Node(node) => {\n", | |
" for (k, v) in node.tags.iter() {\n", | |
" (*node_tags.entry(k.to_string()).or_insert(vec![])).push(v.to_string());\n", | |
" }\n", | |
" }\n", | |
" osmpbfreader::OsmObj::Way(way) => {\n", | |
" for (k, v) in way.tags.iter() {\n", | |
" (*way_tags.entry(k.to_string()).or_insert(vec![])).push(v.to_string());\n", | |
" }\n", | |
" }\n", | |
" osmpbfreader::OsmObj::Relation(rel) => {\n", | |
" for (k, v) in rel.tags.iter() {\n", | |
" (*relation_tags.entry(k.to_string()).or_insert(vec![])).push(v.to_string());\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
"};\n", | |
"\n", | |
"fn select_tags(tags: &TagMap, max_items: usize) -> Vec::<(String, usize)> {\n", | |
" let mut filtered_tags = tags.iter().filter_map(|(k, v)| {\n", | |
" if v.len() > max_items { Some((k.clone(), v.len())) } else { None }\n", | |
" }).collect::<Vec::<(String, usize)>>();\n", | |
" filtered_tags.sort_by(|(_k1, v1), (_k2, v2)| v2.cmp(v1));\n", | |
" filtered_tags\n", | |
"}\n", | |
"\n", | |
"let max_items = 3000;\n", | |
"\n", | |
"println!(\"--- node tags ---\");\n", | |
"println!(\"{:?}\", select_tags(&node_tags, max_items));\n", | |
"println!(\"\");\n", | |
"\n", | |
"println!(\"--- way tags ---\");\n", | |
"println!(\"{:?}\", select_tags(&way_tags, max_items));\n", | |
"println!(\"\");\n", | |
"\n", | |
"println!(\"--- relation tags ---\");\n", | |
"println!(\"{:?}\", select_tags(&relation_tags, max_items));\n", | |
"println!(\"\");" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "17d29cd6", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"--- values for highway node ---\n", | |
"[(\"traffic_signals\", 58610), (\"crossing\", 56701), (\"bus_stop\", 53173), (\"street_lamp\", 6934), (\"stop\", 5880), (\"turning_circle\", 1487), (\"motorway_junction\", 1241), (\"elevator\", 734), (\"traffic_mirror\", 446)]\n", | |
"\n", | |
"--- values for highway way ---\n", | |
"[(\"residential\", 597695), (\"unclassified\", 438707), (\"service\", 211148), (\"footway\", 178498), (\"track\", 107295), (\"path\", 80424), (\"tertiary\", 79179), (\"steps\", 33224), (\"primary\", 19633), (\"trunk\", 17413), (\"secondary\", 16548), (\"pedestrian\", 11553), (\"motorway\", 6665), (\"motorway_link\", 5952), (\"living_street\", 5215), (\"cycleway\", 5151), (\"trunk_link\", 2254), (\"primary_link\", 1307), (\"construction\", 1267), (\"tertiary_link\", 906), (\"secondary_link\", 581), (\"road\", 528), (\"raceway\", 246), (\"services\", 223), (\"corridor\", 157), (\"elevator\", 144), (\"platform\", 110)]\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"fn select_values(values: &Vec<String>, max_values: usize) -> Vec::<(String, usize)> {\n", | |
" let mut count_by_value = HashMap::<String, usize>::new();\n", | |
" for v in values.iter() {\n", | |
" (*count_by_value.entry(v.to_string()).or_insert(0)) += 1;\n", | |
" };\n", | |
" \n", | |
" let mut filtered_count_by_value = count_by_value.iter().filter_map(|(k, v)| {\n", | |
" if *v > max_values { Some((k.clone(), *v)) } else { None }\n", | |
" }).collect::<Vec::<(String, usize)>>();\n", | |
" filtered_count_by_value.sort_by(|(_k1, v1), (_k2, v2)| v2.cmp(v1));\n", | |
" filtered_count_by_value\n", | |
"}\n", | |
"\n", | |
"let max_values = 100;\n", | |
"\n", | |
"println!(\"--- values for highway node ---\");\n", | |
"println!(\"{:?}\", select_values(&node_tags[\"highway\"], max_values));\n", | |
"println!(\"\");\n", | |
"\n", | |
"println!(\"--- values for highway way ---\");\n", | |
"println!(\"{:?}\", select_values(&way_tags[\"highway\"], max_values));\n", | |
"println!(\"\");" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Rust", | |
"language": "rust", | |
"name": "rust" | |
}, | |
"language_info": { | |
"codemirror_mode": "rust", | |
"file_extension": ".rs", | |
"mimetype": "text/rust", | |
"name": "Rust", | |
"pygment_lexer": "rust", | |
"version": "" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment