# ================================================================= #
# 日本語文型パターン照合器                                          #
# Original Version: tokuhisa                                        #
#      Web Version: Keichiro Katayama                               #
# ----------------------------------------------------------------- #
# wjpp.rb Version 3.0                                               #
# 2007/03/14
# Mail Class for Web                                                #
# ================================================================= #

# Change Log
# 2004/??/?? Version 2.0
#   * wjptcをjppに対応させるために大幅な変更を行った。
#   * xjpp2.0に追従
#
# 2004/06/11 Version 2.1
#   * ppr/spr(matching.cのresult)のフォームでのロードに対応
#   * フォームからパターンの日本語文を入力してのマッチングに対応
#
# 2004/11/05 Version 2.2
#   * 離散記号を変数化(RSN)したxjpp2.1に追従
#   * 関数定義バージョンをwjpp.confに設定して表示できるようにした。
#     (あくまでも手動＆自己申告制です…)
#
# 2004/11/08 Version 2.3
#   * Ａ型変数化した離散記号のまま結果を表示するモードの追加。
#     (離散記号でマッチした部分が分かる)
#   * Syntacticモードでは意味属性の詳細を表示しないように変更。
#   * Don't Show Variety機能の追加(wjpp.rhtml)
#
# 2004/11/09 Version 2.4
#   * Preserve RSN Variables ModeでCoverageを再計算して正しい結果を出せるように
#     PtResult(PtSolution)にメソッド追加
#      --> Preserve RSN Variables ModeではRSNに対してもビットが立つため。
#
# 2005/09/01 Version 2.4.1
#   * libbitree.rb をやめて、Arrayを使うように変更。
#      --> to_a_rev で SystemStackError が出るため。
#
# 2005/11/28 Version 2.4.2
#   * jpp2に対応出来るように、正規表現を変更([^=]+ -> [^=]*)
#     (ただし、変換プログラムで事前に変換する必要あり。)
#   * Windows仕様の改行コード(\r\n)の結果ファイルのロードに対応(chompを追加)
#
# 2007/03/14 Version 3.0
#   * jpp2のサーバプログラム(jpp_server, dRuby使用)を呼び出すように変更
#   * wjptc時代のコードをばっさり削除
#   * その他もろもろを大きく変更


# Load Library
$:.push File.dirname( __FILE__ )

# Ruby/jpp
require 'jpp'

# dRuby
require 'drb/drb'

# CGI Support
require 'cgi'

unless String.new.respond_to?(:to_wide) then
  require 'kakasi'
  class String
    # Convert Narrow Character -> Wide Character
    def to_wide
      Kakasi::kakasi("-oeuc -aE -jE -gE -kK", self)
    end
  end
end

class JppServer
  class JppServerError < StandardError; end
  class MorphAnalyzeError < JppServerError; end
end

class Wjpp
  VERSION = '3.0'
  DEFAULT_CONFIG = 'wjpp.conf'
  
  PATTERN_LEVEL_SET = [ :word, :phrase, :clause, :all ]
  # deleted by k.katayama 2007/05/01
  # DEFAULT_PATTERN_LEVEL = :word
  # added by k.katayama 2007/05/01
  DEFAULT_PATTERN_LEVEL = :all
  
  class WjppError < StandardError; end
  class SentenceTooLong < WjppError; end
  
  class Config
    def initialize( config )
      @jpp_version = 'unknown'
      
      @jpp_srv_uri = { :word => 'druby://localhost:52577', :phrase => 'druby://localhost:52578', :clause => 'druby://localhost:52579', :all => nil }
      
      @pattern_db_dir = '.'
      
      @client_js = 'libwjptc.js'
      @client_style = Hash::new
      
      @result_limit = 0
      @sentence_limit = 0
      
      eval( File::readlines( config ).join( '' ).untaint )
    end
    attr_accessor :jpp_version
    attr_accessor :jpp_srv_uri
    
    attr_accessor :pattern_db_dir
    
    attr_accessor :client_js
    attr_accessor :client_style
    
    attr_accessor :result_limit
    attr_accessor :sentence_limit
  end
  
  def initialize( cgi = CGI.new, config = DEFAULT_CONFIG )
    # Load Configuration File
    @config = Wjpp::Config.new( config )
    
    # CGI Support Library
    @cgi = cgi
    
    # Set Parameters
    Jpp::Matching::Solution.set_pattern_db_dir( @config.pattern_db_dir )
    
    # Default Values
    @pattern_level = DEFAULT_PATTERN_LEVEL
    
    @sentence = ''
    @result = nil
    
    @rsn_show_vara = false
    
    @pattern_num = 0
    
    @exceed_limit = false
    @real_solution_num = 0
  end
  attr_accessor :config
  attr_reader :result
  
  attr_reader :pattern_num
  
  def exceed_limit?
    @exceed_limit == true
  end
  attr_reader :real_solution_num
  
  # -------------------
  # Sentence
  # -------------------
  def sentence= ( japanese_sentence )
    @sentence = japanese_sentence 
  end
  def sentence
    @sentence
  end
  
  def check_sentence
    if @sentence.nil? then
      @sentence = ""
      return false
    end
    if @sentence.empty? then
      return false
    end
    
    # Normalize
    @sentence.gsub!( /，/, '、' )
    @sentence.gsub!( /．/, '。' )
    unless @sentence =~ /[。？]\z/ then
      @sentence << "。"
    end
    
    # Check Length
    @sentence = @sentence.to_wide
    if @config.sentence_limit > 0 && @sentence.length / 2 > @config.sentence_limit then
      raise Wjpp::SentenceTooLong, "Length = #{@sentence.length/2}"
      return false
    end
    
    return true
  end
  
  # -------------------
  # Pattern Level
  # -------------------
  def pattern_level= ( level )
    if ! PATTERN_LEVEL_SET.include?( level ) then
      @pattern_level = DEFAULT_PATTERN_LEVEL
    else
      @pattern_level = level
    end
  end
  def pattern_level
    @pattern_level
  end
  def pattern_level_name
    @pattern_level.to_s.capitalize
  end
  
  # -------------------
  # Main
  # -------------------
  def do_matching
    unless check_sentence then
      return @result = nil
    end
    
    @result = nil
    @pattern_num = 0
    case @pattern_level
    when :word, :phrase, :clause
      @result = do_matching_sub( @pattern_level )
    when :all
      solutions = Array.new
      last_result = nil
      [ :clause, :phrase, :word ].each{ |level|
        last_result = do_matching_sub( level )
        unless last_result.nil? then
          solutions.push *(last_result.solutions)
          if @result.nil? then
            @result = last_result
          end
        end
      }
      if @result.nil? then
        @result = Jpp::Matching::Result.new( "", true )
      end
      solutions.sort!
      solutions.reverse!
      @result.instance_eval{
        @solutions.replace( solutions )
      }
    else
      raise Exception
    end
    
    @real_solution_num = @result.solutions.length
    @exceed_limit = false
    if @config.result_limit > 0 && @real_solution_num > @config.result_limit then
      @result.solutions.slice!( @config.result_limit, @real_solution_num - @config.result_limit )
      @exceed_limit = true
    end
    
    @result
  end
  
  private
  def do_matching_sub( level )
    js_uri = nil
    case level
    when :word, :phrase, :clause
      js_uri = @config.jpp_srv_uri[ level ]
    else
      raise Exception
    end
    
    jpp_server = DRbObject.new_with_uri( js_uri )
    raw_result = jpp_server.matching( @sentence )
    result = Jpp::Matching::Result.new( raw_result.split(/\n/), true )
    @pattern_num += jpp_server.pattern_num
    jpp_server = nil
    
    result
  end
end

# ------------------------------------------------------------------

# =====================
# Sub
# =====================
def num2comstr(num)
  newstr = ''
  oldstr = num.to_s.reverse
  len = oldstr.length
  i = 1
  while i <= len do
    newstr += oldstr[i-1,1]
    if i % 3 == 0 && i <= len - 1 then
      newstr += ','
    end
    i += 1
  end
  return newstr.reverse
end


class Jpp::Matching::Solution
  class << self
    def set_pattern_db_dir( path )
      @@pattern_db_dir = path
    end
  end
  
  def make_html_with_morph( morph_obj )
    load_from_database
    
    var_idx2var_info = Hash.new
    var_idx_list = Array.new
    @var_list.each{ |info|
      if info[:bit_morph] > 0 then
        var_idx_begin = info[:bit_morph].num_of_training_zero
        var_idx_length = var_idx_begin + info[:bit_morph].num_of_continuous_bits1_from_ntz
        var_idx = (var_idx_begin...var_idx_length).to_a
        var_idx_list.push var_idx
        var_idx2var_info[var_idx] = info
      end
    }
    
    # Make Morph Line
    i = 0
    
    html_morph_line = ""
    html_var_line = ""
    morph_len = morph_obj.length
    while i < morph_len
      var_idx = var_idx_list.assoc(i)
      if var_idx.nil? then
        td_class = "els"
        unless ( @match_bit_morph & ( 2 ** i ) ) == 0 then
          td_class = "spl"
        end
        html_morph_line << '<td class="%s">%s</td>' % [ td_class, CGI.escapeHTML( morph_obj[i].word ) ]
        html_var_line   << '<td class="%s"></td>' % td_class
        i += 1
      else
        length = var_idx.length
        if length > 2 then
          tmp = '<td class="var" colspan="%d">' % length
          html_morph_line << tmp
          html_var_line << tmp
        else
          html_morph_line << '<td class="var">'
          html_var_line << '<td class="var">'
        end
        span_tag = '<span class="wjpp%d-%d" style="cursor: pointer;" onMouseOver="ChangeStyle( \'wjpp%d\', \'%d\', 1 );" onMouseOut="ChangeStyle( \'wjpp%d\', \'%d\', 0 );">' % [ self.object_id, var_idx2var_info[var_idx][:var_id], self.object_id, var_idx2var_info[var_idx][:var_id], self.object_id, var_idx2var_info[var_idx][:var_id] ]
        html_morph_line << span_tag
        html_var_line << span_tag
        
        html_morph_line << CGI.escapeHTML( morph_obj.values_at(*var_idx).collect{|mi| mi.word}.join('') )
        html_var_line << CGI.escapeHTML( var_idx2var_info[var_idx][:name] )
        
        html_morph_line << '</span></td>'
        html_var_line << '</span></td>'
        i += length
      end
    end
    
    # Make Ja-Pattern Line
    attr_map = Hash.new
    html_pbody_ja_line = CGI.escapeHTML(@pbody_ja).gsub( /(N|ND|TIME|NUM|AJ|AJV|ADV|REN|GEN|V|NP|VP|AJP|AJVP|ADVP|CL|ANY)(\d+)(?:\(((?:N[IYK]|KR|I[MY]):[^\)]+)\))?/ ) {
      var_name = $1
      var_id = $2
      var_info = var_name + var_id
      unless $3.nil? then
        attr_map[ var_info ] = $3
      end
      '<span class="wjpp%d-%s" style="cursor: pointer;" onMouseOver="ChangeStyle( \'wjpp%d\', \'%s\', 1 );" onMouseOut="ChangeStyle( \'wjpp%d\', \'%s\', 0 );">%s</span>' % [ self.object_id, var_id, self.object_id, var_id, self.object_id, var_id, CGI.escapeHTML(var_info) ]
    }.gsub( %r((/[ytcfk]+)), '<span class="risan">\1</span>' )
    
    unless attr_map.empty? then
      html_pbody_ja_line << '<br>'
      html_pbody_ja_line << '<span class="attribute">'
      attr_map.keys.sort.each{ |var|
        attr = attr_map[var]
        var =~ /\A[A-Z]+(\d+)\z/
        var_id = $1
        html_pbody_ja_line << '<span class="value"><span class="wjpp%d-%s">%s</span></span>' % [ self.object_id, var_id, var ]
        html_pbody_ja_line << '('
        html_pbody_ja_line << attr.split(/,/).collect{|x| CGI.escapeHTML(x)}.join(" ")
        html_pbody_ja_line << ') '
      }
      html_pbody_ja_line.strip!
      html_pbody_ja_line << '</span>'
    end
    
    # Make En-Pattern Line
    html_pbody_en_line = ""
    html_pbody_en_line = CGI.escapeHTML(@pbody_en).gsub( /(N|ND|TIME|NUM|AJ|AJV|ADV|REN|GEN|V|NP|VP|AJP|AJVP|ADVP|CL|ANY|DEF)(\d+)/ ) {
      var_name = $1
      var_id = $2
      var_info = var_name + var_id
      '<span class="wjpp%d-%s" style="cursor: pointer;" onMouseOver="ChangeStyle( \'wjpp%d\', \'%s\', 1 );" onMouseOut="ChangeStyle( \'wjpp%d\', \'%s\', 0 );">%s</span>' % [ self.object_id, var_id, self.object_id, var_id, self.object_id, var_id, CGI.escapeHTML(var_info) ]
    }
    
    return html_morph_line, html_var_line, html_pbody_ja_line, html_pbody_en_line
  end
  
  attr_reader :sent_ja
  attr_reader :sent_en
  
  private
  def load_from_database
    @pattern_id =~ /^(?:([WPC])J)?([A-Z][A-Z])(\d\d\d)(\d+.*)$/
    level = $1
    pid_1 = $2
    pid_2 = $3
    pid_3 = $4
    
    line_base = 3
    case level
    when 'W'
      line_base = 3
    when 'P'
      line_base = 5
    when 'C'
      line_base = 7
    end
    
    fname = File.join( @@pattern_db_dir, pid_1, pid_2, pid_3 + '.txt' )
    fname.untaint
    data = File.readlines( fname )
    
    @sent_ja = data[1].chomp
    @sent_en = data[2].chomp
    
    @pbody_ja = data[line_base  ].chomp
    @pbody_en = data[line_base+1].chomp
    
    # Clean OR-tag
    @pbody_ja.gsub!( /\(OR:[^,\)]+,\s*/, '(' )
    @pbody_ja.gsub!( /\(OR:[^\)]+\)/, '' )
    @pbody_en.gsub!( /\(OR:[^\)]+\)/, '' )
  end
  
  
end

