Created
December 5, 2017 10:46
-
-
Save higuma/b161b262fbef83569fdc125a90a0e9c3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'net/http' | |
URI_BASE = 'https://www.marutsu.co.jp/contents/shop/marutsu/mame' | |
EXT = ['html', 'htm', 'pdf'] | |
NUMBER_RANGE = 1..220 # max = 205? | |
def https_request(uri) | |
uri = URI uri | |
Net::HTTP.start uri.host, uri.port, use_ssl: uri.scheme == 'https' do |http| | |
response = http.request Net::HTTP::Get.new(uri) | |
return true if response.is_a? Net::HTTPOK | |
end | |
end | |
for number in NUMBER_RANGE do | |
indices = case number | |
when 1..9 | |
["#{number}", "0#{number}", "00#{number}"] | |
when 10..99 | |
["#{number}", "0#{number}"] | |
else | |
["#{number}"] | |
end | |
exist = [] | |
for index in indices do | |
for ext in EXT do | |
page = "#{index}.#{ext}" | |
path = "#{URI_BASE}/#{page}" | |
exist.push page if https_request path | |
end | |
end | |
p exist unless exist.empty? | |
end | |
__END__ | |
マルツ「パーツまめ知識」サイトのページ存在判定 | |
サイトは旧式のスタティックページだが、パスの規則がきちっと決まっていない。 | |
* 一桁の番号は"08.html"("8.html"や"008.html"はない) | |
* 二桁は"32.html"("032.html"はない) | |
* 三桁は"123.html" | |
* わずかだが"191.pdf"もある | |
* まれに"80.htm"もある("80.html"と同じ、調べたがこれひとつだけ) | |
そこでこのような全ケースを全部HTTPSでアクセスして存在するかどうか調べるスクリプトを作った。結果は次の通り。なお"80.htm"は80.htmlと全く同じなので以降のリスト作成からは除外する。 | |
["08.html"] | |
["09.html"] | |
["10.html"] | |
["11.html"] | |
["13.html"] | |
["14.html"] | |
["15.html"] | |
["16.html"] | |
["17.html"] | |
["18.html"] | |
["20.html"] | |
["21.html"] | |
["22.html"] | |
["23.html"] | |
["24.html"] | |
["25.html"] | |
["26.html"] | |
["27.html"] | |
["29.html"] | |
["30.html"] | |
["31.html"] | |
["32.html"] | |
["33.html"] | |
["34.html"] | |
["36.html"] | |
["38.html"] | |
["39.html"] | |
["40.html"] | |
["41.html"] | |
["42.html"] | |
["43.html"] | |
["44.html"] | |
["45.html"] | |
["46.html"] | |
["47.html"] | |
["48.html"] | |
["49.html"] | |
["50.html"] | |
["51.html"] | |
["52.html"] | |
["53.html"] | |
["54.html"] | |
["55.html"] | |
["56.html"] | |
["57.html"] | |
["58.html"] | |
["59.html"] | |
["60.html"] | |
["61.html"] | |
["62.html"] | |
["63.html"] | |
["64.html"] | |
["65.html"] | |
["66.html"] | |
["67.html"] | |
["68.html"] | |
["69.html"] | |
["70.html"] | |
["71.html"] | |
["72.html"] | |
["73.html"] | |
["74.html"] | |
["75.html"] | |
["76.html"] | |
["77.html"] | |
["78.html"] | |
["79.html"] | |
["80.html", "80.htm"] | |
["81.html"] | |
["82.html"] | |
["83.html"] | |
["84.html"] | |
["85.html"] | |
["86.html"] | |
["87.html"] | |
["88.html"] | |
["89.html"] | |
["90.html"] | |
["91.html"] | |
["92.html"] | |
["93.html"] | |
["94.html"] | |
["95.html"] | |
["96.html"] | |
["97.html"] | |
["98.html"] | |
["99.html"] | |
["100.html"] | |
["101.html"] | |
["102.html"] | |
["103.html"] | |
["104.html"] | |
["105.html"] | |
["106.html"] | |
["107.html"] | |
["108.html"] | |
["109.html"] | |
["110.html"] | |
["111.html"] | |
["112.html"] | |
["113.html"] | |
["114.html"] | |
["115.html"] | |
["116.html"] | |
["117.html"] | |
["118.html"] | |
["119.html"] | |
["120.html"] | |
["121.html"] | |
["122.html"] | |
["123.html"] | |
["124.html"] | |
["125.html"] | |
["126.html"] | |
["127.html"] | |
["128.html"] | |
["129.html"] | |
["130.html"] | |
["131.html"] | |
["132.html"] | |
["133.html"] | |
["134.html"] | |
["135.html"] | |
["136.html"] | |
["137.html"] | |
["138.html"] | |
["139.html"] | |
["140.html"] | |
["141.html"] | |
["142.html"] | |
["143.html"] | |
["144.html"] | |
["145.html"] | |
["146.html"] | |
["147.html"] | |
["148.html"] | |
["149.html"] | |
["150.html"] | |
["151.html"] | |
["152.html"] | |
["153.html"] | |
["154.html"] | |
["155.html"] | |
["156.html"] | |
["157.html"] | |
["158.html"] | |
["159.html"] | |
["160.html"] | |
["161.html"] | |
["162.html"] | |
["163.html"] | |
["164.html"] | |
["165.html"] | |
["166.html"] | |
["167.html"] | |
["168.html"] | |
["169.html"] | |
["170.html"] | |
["171.html"] | |
["172.html"] | |
["173.html"] | |
["174.html"] | |
["175.html"] | |
["176.html"] | |
["177.html"] | |
["178.html"] | |
["179.html"] | |
["180.html"] | |
["181.html"] | |
["182.html"] | |
["183.html"] | |
["184.html"] | |
["185.html"] | |
["186.html"] | |
["187.html"] | |
["188.html"] | |
["189.html"] | |
["190.html"] | |
["191.html", "191.pdf"] | |
["192.html", "192.pdf"] | |
["193.html", "193.pdf"] | |
["194.html", "194.pdf"] | |
["196.html"] | |
["197.html"] | |
["198.html"] | |
["199.html"] | |
["200.html"] | |
["201.html"] | |
["202.html"] | |
["203.html"] | |
["204.html"] | |
["205.html"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment