File: //usr/share/gems/gems/did_you_mean-1.2.0/evaluation/dictionary_generator.rb
require 'open-uri'
require 'cgi'
require 'json'
per_page = 500
base_url = "https://simple.wiktionary.org/w/api.php?action=query&aplimit=#{per_page}&list=allpages&format=json"
filename = "evaluation/dictionary.yml"
apfrom = ""
num = 0
titles = []
loop do
url = base_url + "&apfrom=#{apfrom}"
puts "downloading page %2d: #{url}" % num
body = open(url).read
json = JSON.load(body)
count = json["query"]["allpages"].size
apfrom = CGI.escape(json["query"]["allpages"].last['title']) if count > 0
titles += json["query"]["allpages"].map {|hash| hash["title"] }
num += 1
break if count != per_page
end
require 'yaml'
File.open(filename, 'w') do |file|
file.write(titles.uniq.to_yaml)
end
puts "
Number of titles: #{titles.uniq.size}
Dictionary saved: #{filename}
"