#!/usr/bin/ruby require 'rubygems' require 'sqlite3' require 'parsevolc' class Loader def initialize @db = SQLite3::Database.new('wmo9.sqlite') @count = 3000 @keep = false end def count=(val) @count = val end def keep=(bool) @keep = bool end def close @db.close end def mkdb @db.execute_batch(<<-ENDSQL) CREATE TABLE IF NOT EXISTS volc1raw( region TEXT NOT NULL, rth TEXT NOT NULL, country TEXT NOT NULL, centre TEXT NOT NULL, date DOUBLE, category TEXT, ttaaii TEXT NOT NULL, cccc TEXT NOT NULL, codeform TEXT, timegroup TEXT, content TEXT, remarks TEXT ); CREATE INDEX IF NOT EXISTS volc1rawahl ON volc1raw (ttaaii, cccc); ENDSQL @db.execute("DELETE FROM volc1raw") unless @keep end def fixdate row row['date'] = case row['date'] when /^([0123][0-9])\/([01][0-9])\/([12][09]\d\d)$/ then "#{$3}-#{$1}-#{$2}" when /^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d$/ then $& when '', nil nil else raise "malformatted date (#{row['date']})" end end def fixrth row unless row['rth'] row['rth'] = case row['cccc'] when /^(EDZW|EUMS)$/ then 'OFFENBACH' else raise "unknown cccc=#{row['cccc']} for rth-assignment" end end end def fixcentre row case row['centre'] when %r row['centre'] = 'CAIRO' when /\sIN REGION [IV]+:\s*/ row['centre'] = $' end end def fixcountry row case row['country'] when /^ANTARCTIC - / row['country'] = 'ANTARCTIC' end end def fixtimegroup row that = row['timegroup'] row['timegroup'] = case that when /^[.,\s]*(\d+(?:[\s.,]+\d+)*)[,\s]*$/ then $1.split(/[\s.,]+/). map{ |h| case h.length when 1 then "0#{h}" else h end }.join(',') when /^HOURLY\s*\(([-\d,]+)\)/ then h = {} for fragm in $1.split(/,/) case fragm when /^\d\d$/ then h[fragm] = true when /^(\d\d)-(\d\d)$/ then for ih in ($1.to_i)..($2.to_i) h[sprintf("%02u", ih)] = true end else raise "timegroup=<#{that}>: unknown time(#{fragm})" end end h.keys.sort.join(',') when /^HLY (\d+) TO (\d+)$/ then (($1.to_i)..($2.to_i)).map{|h| format('%02u', h)}.join(',') when /^HOURLY( WHEN AVAILABLE)?$/, /^HLY$/ then (0..23).map{|h| format('%02u', h)}.join(',') when /^HALF[- ](HOURLY|HLY)$/ then (0..23).map{|h| format('%02u,%02u30', h, h)}.join(',') when /^H *\+ *00$/ then (0..23).map{|h| format('%02u', h)}.join(',') when /^H *\+ *(\d\d)$/ then n = $1 (0..23).map{|h| format('%02u%s', h, n)}.join(',') when /^H\+(\d\d) FOR (\d\d(?:,\d\d)*)$/ then n = $1 $2.split(/,/).map{|h| format('%2.2s%2.2s', h, n) }.join(',') when /^H\+(\d\d) ?\((\d\d)-(\d\d)\)$/ then n = $1 (($2.to_i)..($3.to_i)).map{|h| format('%02u%2.2s', h, n)}.join(',') when /^H\+(\d\d),(\d\d) ?\((\d\d)-(\d\d)\)$/ then n1, n2 = $1, $2 (($3.to_i)..($4.to_i)).map{|h| format('%02u%2.2s,%02u%2.2s', h, n1, h, n2) }.join(',') when /^H\+\d\d(?:,H\+\d\d)+$/ then nn = $&.split(/,/).map{|n| n.sub(/^H\+/, '')} (0..23).map{|h| nn.map{|n| format('%02u%s', h, n)}}.flatten.join(',') else that end end def fixcontent row that = row['content'] case that when /^TAF(( [A-Z]{4})+)$/ then row['content'] = $1 end end def fix row fixdate row fixrth row fixcentre row fixcountry row fixtimegroup row fixcontent row end def putdb_core queue return if queue.empty? @db.transaction { for row in queue begin @db.execute(<<-ENDSQL, row) INSERT INTO volc1raw( region, rth, country, centre, date, category, ttaaii, cccc, codeform, timegroup, content, remarks ) VALUES ( :region, :rth, :country, :centre, JULIANDAY(:date), :category, :ttaaii, :cccc, :codeform, :timegroup, :content, :remarks ) ENDSQL $deferr.printf " %6s %4s\r", row['ttaaii'], row['cccc'] if $deferr.tty? rescue Exception => e p row raise e end end $deferr.printf "*\n" if $deferr.tty? } end def putdb file queue = [] ParseVolc1.read(file) { |row| fix(row) queue.push(row) if queue.size > @count putdb_core(queue) queue = [] end } putdb_core(queue) end def run file putdb file end end app = Loader.new while /^-/ =~ ARGV.first case arg = ARGV.shift when /^-c(\d+)/ then app.count = $1.to_i when /^-k/ then app.keep = true else puts "usage: #{$0} [-k] [-c3000] volc_files ..." exit 1 end end app.mkdb for file in ARGV app.run(file) end app.close