File: //usr/local/rvm/src/ruby-2.3.8/gems/did_you_mean-1.0.0/evaluation/dictionary_generator.rb
require 'open-uri'
require 'cgi'
require 'json'
per_page = 500
base_url = "https://simple.wiktionary.org/w/api.php?action=query&aplimit=#{per_page}&list=allpages&format=json"
filename = "evaluation/dictionary.yml"
count = nil
apfrom = ""
num = 0
titles = []
begin
url = base_url + "&apfrom=#{apfrom}"
puts "downloading page %2d: #{url}" % num
body = open(url).read
json = JSON.load(body)
count = json["query"]["allpages"].size
apfrom = CGI.escape(json["query"]["allpages"].last['title']) if count > 0
titles += json["query"]["allpages"].map {|hash| hash["title"] }
num += 1
end while count == per_page
require 'yaml'
File.open(filename, 'w') do |file|
file.write(titles.uniq.to_yaml)
end
puts "
Number of titles: #{titles.uniq.size}
Dictionary saved: #{filename}
"