#!/usr/bin/ruby -Ke

# ===============================
# Jpp::Matching Library
# Version 1.5
# 2006/09/19
# Keichiro Katayama
# ===============================

# Change Logs
# Version 1.0 (2004/12/15)
#   First Release
# Version 1.1 (2004/12/31)
#   Change Timing of Recalculation of Coverage
# Version 1.2 (2005/01/05)
#   Add Configuration Class
# Version 1.3 (2005/01/08)
#   Add Jpp::Matching::Result.load( filename )
# Version 1.4 (2006/09/14)
#   Change for Jpp2
# Version 1.5 (2006/09/19)
#   Using pat2map.rb

module Jpp
class Matching

  # =============================
  # Class Methods
  # =============================
  class << self

    # Matching( Sentence <=> Pattern )
    def matching( sentence, pattern, config = Jpp::Matching::Configuration.new )

      # Morph Analyze
      morph_fp = Jpp::Morph.analyze_internal( sentence )

      # Error Check
      if morph_fp.nil? then
        raise MorphAnalyzeError
      end

      # Matching
      matching2( morph_fp.path, pattern, config )

    end

    # Matching( Morph Data File <=> Pattern )
    def matching2_jpp2_pattern( morph_data_filename, pattern, config = Jpp::Matching::Configuration.new )

      # Create Pattern File
      ptn_fp = Tempfile.new( 'jppmatching' )
      case pattern
      when String
        ptn_fp.puts "P0:#{pattern}"
      when Array
        pattern.each_index{ |i|
          ptn_fp.puts "P#{i}:#{pattern[i].to_s}"
        }
      else
        ptn_fp.puts "P0:#{pattern.to_s}"
      end
      ptn_fp.close( false )

      # Create Map Filename
      map_fp = Tempfile.new( 'jppmatching' )
      map_fp.close( false )

      # Make Map File
      #  arg1 = 文型パターン・ファイル名   (in)
      #  arg2 = パターン・マップファイル名 (out)
      #  arg3 = 関数マップ・ファイル名     (in)
      #  arg4 = 意味属性コードファイル名   (in)
      #  arg5 = 類語辞典(ruigo.dic)        (in)
      cmd_line = [ Jpp.pattern_cmd, ptn_fp.path, map_fp.path, "> /dev/null 2>&1" ].join( " " )
      buffer_stdout = buffer_stderr = nil # Init
      Open3.popen3( cmd_line ) { |io_in, io_out, io_err|
        buffer_stdout = io_out.readlines
        buffer_stderr = io_err.readlines
      }

      # Matching
      config.map_pattern = map_fp.path
      matching3( morph_data_filename, config )

    end

    # Matching( Morph Data File <=> Pattern )
    # use pat2map.rb(embed)
    def matching2( morph_data_filename, pattern, config = Jpp::Matching::Configuration.new )

      # Create Pattern File
      ptn_fp = Tempfile.new( 'jppmatching' )
      line = "P0:#{pattern}"
      ptn_fp.puts line
      ptn_fp.close( false )

      # Create Map Filename
      map_fp = Tempfile.new( 'jppmatching' )

      # Make Map File
      # Pat2map
      p2m = Pat2Map::Converter.new( nil, false )

      begin
        p2m.parse( line, 0 )
      rescue Jpp::PatternParseError
        buffer = Array.new
        buffer << ""
        buffer << "----"
        if $!.message =~ /^(.*), pos=(-?\d+)$/ then
          # verbose mode
          msg = $1
          pos = $2.to_i
          pid, pbody = line.chomp.split( /:/, 2 )
          buffer << "Pattern Parse Error: #{msg}"
          buffer << "----> #{pid}:#{pbody}"
          buffer << "      " + " " * pid.length + " " + " " * pos + "^"
        else
          buffer << "Pattern Parse Error!(#{$!.message})"
          buffer << " --> #{line.chomp}"
        end
        
        $stderr.puts buffer.join("\n")
        exit
      rescue
        $stderr.puts ""
        $stderr.puts "----"
        $stderr.puts "Unknown Error!"
        $stderr.puts " --> #{line.chomp}"
        exit
      end

      p2m.dump( map_fp, ptn_fp.path )
      p2m.close
      p2m = nil
      map_fp.close( false )
      # end of

      # Matching
      config.map_pattern = map_fp.path
      result = matching3( morph_data_filename, config )

      map_fp.close( true )
      ptn_fp.close( true )
      
      map_fp = nil
      ptn_fp = nil
      
      result
    end

    # Matching( Morph Data file <=> Pattern Map File)
    def matching3( morph_data_filename, config )
      # Bypass File Check(^^;)
=begin
      if ! File.exist?( morph_data_filename ) then
        raise ArgumentError, "Cannot be found Morph Data File"
      end

      if ! config.valid? then
        raise MatchingConfigurationError, "Some files isn't found!"
      end
=end

      # Do Matching
      #  arg1 = 文型パターンマップファイル名
      #  arg2 = 形態素解析ファイル名
      #  arg3 = S 関数マップファイル名
      #  arg4 = A 関数マップファイル名
      #  arg5 = RCファイル名
      cmd_line = [ Jpp.matching_cmd, config.map_pattern, morph_data_filename, config.map_stype, config.map_atype, config.jpp_rc, "2> /dev/null" ].join( " " )
      matching_result = buffer_stderr = nil # Init
      Open3.popen3( cmd_line ) { |io_in, io_out, io_err|
        io_in.close
        matching_result = io_out.readlines
        buffer_stderr = io_err.readlines
      }

      # Check Matching-Result
      while ! matching_result.empty?
        if matching_result.first =~ /^INPUT=/ then
          break
        end
        matching_result.shift
      end 
      if matching_result.empty? then
        # No Result
        return nil
      end

      # Cleaning
      matching_result.collect!{ |line| line.chomp }

#$stderr.puts " ========== [ matching3 ] ========== "
#$stderr.puts matching_result.join("\n")
#$stdin.gets

      # Create Matching-Result Object
      return Matching::Result.new( matching_result, config.reject_rsn )
    end

  end # end of class methods

  # Matching Result Parser
  class Result

    # =============================
    # Class Methods
    # =============================
    class << self

      # Load Result from File
      def load( filename, reject_rsn = false )
        if ! File.exist?( filename ) then
          raise ArgumentError, "File not found(#{filename})"
        end
        result_array = File.readlines( filename ).collect{|line| line.chomp}
        Jpp::Matching::Result.new( result_array, reject_rsn )
      end

    end

    def initialize( result_array_org, reject_rsn )
      # Init Instance Vars
      @input_filename = nil
      @input_bit_char = nil
      @input_bit_morph = nil
      @input_sentence = nil
      @morph_data = nil
      @solutions = Array.new

      # Parse Results
      if result_array_org.empty? then
        return
      end
      
      result_array = result_array_org.dup

      # 1: INPUT=[@input_filename]=[@input_bit_char]=[@input_morph_char]
      temp, @input_filename, @input_bit_char, @input_bit_morph = result_array.shift.split( /=/, 4 )
      if temp != 'INPUT' then
        raise ArgumentError, "param isn't matching result file!"
      end

      # Convert HEX(String) -> Fixnum
      if ! @input_bit_char.nil? then
        @input_bit_char = @input_bit_char.hex
      end
      if ! @input_bit_morph.nil? then
        @input_bit_morph = @input_bit_morph.hex
      end

      # 2: <Input Sentence>
      result_array.shift # skip
      
      # 3...: <Morph Data>
      @morph_data = Morph.load_morph_result( result_array.slice!( (0)...(result_array.index( Jpp::SEPARATER )) ) )

      # Rebuild Input Sentence
      @input_sentence = @morph_data.collect{ |mitem| mitem.word }.join( '' )

      if @input_bit_morph.nil? || @input_bit_morph == 0 then
        # Recalc input_bit_morph
        #$stderr.puts "Recalc input_bit_morph"
        @input_bit_morph = @morph_data.length.to_i.fill_bits1
      end
      if @input_bit_char.nil? || @input_bit_char == 0 then
        # Recalc input_bit_char
        #$stderr.puts "Recalc input_bit_char"
        @input_bit_char = @input_sentence.split(//).length.to_i.fill_bits1
      end

      # 4: ----- (Separater)
      result_array.shift # skip
      
      # 5: Results
      solution_lines = Array.new
      solutions = Array.new # for duplicated result
      while ! result_array.empty?
        solution_lines << result_array.shift
        if result_array.first == Jpp::SEPARATER then
          result_array.shift
          solutions << solution_lines
          solution_lines = Array.new
        elsif result_array.empty? then
          solutions << solution_lines
          solution_lines = Array.new
        elsif result_array.first.index( 'PATTERN=' ) == 0 then
          solutions << solution_lines
          solution_lines = Array.new
        end
      end # while
      if ! solution_lines.empty? then
        raise ArgumentError, "Illegall result format!"
      end
      solutions.uniq!
      @solutions = solutions.collect{ |solution| Matching::Solution.new( solution, @input_bit_char, reject_rsn ) }

      @solutions.sort!
      @solutions.reverse!

    end # Result.new
    attr_reader :input_filename
    attr_reader :input_bit_char
    attr_reader :input_bit_morph
    attr_reader :input_sentence
    attr_reader :morph_data
    attr_reader :solutions
    
  end # class Result

  class Solution
    include Comparable
   
   # when reject_rsn = true, match_bit_char and match_bit_morph is re-calculation.
    def initialize( solution_array_org, input_bit_char = nil, reject_rsn = false )
      # Init Instance Vars
      @pattern_id = nil
      @pattern_body = nil
      @route = Array.new
      @match_bit_char = nil
      @match_bit_morph = nil
      @memory_list = Hash.new
#      @movable_list = Hash.new
#      @morph_list = Hash.new
      @var_list = Array.new
      @yousou_list = Array.new
      @jidura_list = Array.new
     
      @coverage = -1.0

      if solution_array_org.empty? then
        return
      end
    
      solution_array = solution_array_org.dup

      # 1: PATTERN=[@pattern_id]=[@pattern_body]=["[" + @route + "]"]=[@match_bit_char]=[@match_bit_morph]
      temp, @pattern_id, @pattern_body, route_info, @match_bit_char, @match_bit_morph = solution_array.shift.split( /=/, 6 )
      if temp != 'PATTERN' then
        raise ArgumentError, "param isn't matching result file!"
      end

      # Convert route_info into Array
      if ! route_info.nil? then
        @route = route_info.sub( /^\[([^\]]+)\]/, '\1' ).split( /,/ )
      end

      # Convert HEX(String) -> Fixnum
      if ! @match_bit_char.nil? then
        @match_bit_char = @match_bit_char.hex
        
        # Calculate Coverage
        if ! input_bit_char.nil? then
          calculate_coverage( input_bit_char )
        end
      end
      if ! @match_bit_morph.nil? then
        @match_bit_morph = @match_bit_morph.hex
      end

      # Parse other info
      var_name = var_info = hyoujun = word = route_info = bit_char = bit_morph = number = info = nil
      katuyou = nil
      mask_bit_morph = 0
      mask_bit_char = 0
      solution_array.each{ |line|
        case line
        when /^([A-Z_]+\d+)=(.*)$/  # Variable
          var_name = $1
          var_info = $2
          #p var_info
          # Jpp2
          # 変数名=標準表記=活用形=単語表記=経路情報=文字位置ビット=形態素位置ビット
          p_byte_pos = hyoujun = katuyou = word = route_info = bit_char = bit_morph = nil
          tmp = var_info.split( /=/, 6 )
          case tmp.length
          when 6
            hyoujun, katuyou, word, route_info, bit_char, bit_morph = tmp
          when 5
            hyoujun, word, route_info, bit_char, bit_morph = tmp
          when 2
            hyoujun, word = tmp
          else
            raise ArgumentError, "Unknown Var Info(#{var_info})"
          end

          # Remove RSN Matching for Coverage
          if reject_rsn && var_name.slice( 0, 3 ) == 'RSN' then
            # Risan
            if ! bit_char.nil? then
              mask_bit_char =  mask_bit_char | bit_char.hex
            end
            bit_char = 0

            if ! bit_morph.nil? then
              mask_bit_morph = mask_bit_morph | bit_morph.hex
            end
            bit_morph = 0
  
          else
            if ! bit_char.nil? then
              bit_char = bit_char.hex
            end
            if ! bit_morph.nil? then
              bit_morph = bit_morph.hex
            end
          end
          
          var_name =~ /\A[A-Z]+(\d+)\z/
          var_id = $1.to_i

          @var_list << { :name => var_name, :hyoujun => hyoujun, :word => word, :route => route_info, :bit_char => bit_char, :bit_morph => bit_morph, :var_id => var_id }

=begin
        when /^#(\d+)=(.*)$/        # #n Memory
          number = $1.to_i
          info = $2
          route_info, bit_char, bit_morph = info.split( /=/, 3 )
          if ! bit_char.nil? then
            bit_char = bit_char.hex
          end
          if ! bit_morph.nil? then
            bit_morph = bit_morph.hex
          end
          @memory_list[ number ] = { :route => route_info, :bit_char => bit_char, :bit_morph => bit_morph }

        when /^\$(\d+)=(.*)$/       # $n Movable
          number = $1.to_i
          info = $2.to_i
          @movable_list[ number ] = info

        when /^mrp(\d+)=(.*)$/      # Imizokusei-Seiyaku
          number = $1.to_i
          info = $2
          @morph_list[ number ] = info

        when /^\.([a-z_]+)=(.*)$/  # Yousou-Function
          name = $1
          info = $2
          p_byte_pos, hyoujun, katuyou, word, route_info, bit_char, bit_morph = info.split( /=/, 7 )
          if ! p_byte_pos.nil? then
            p_byte_pos = p_byte_pos.hex
          end
          if ! bit_char.nil? then
            bit_char = bit_char.hex
          end
          if ! bit_morph.nil? then
            bit_morph = bit_morph.hex
          end
          @yousou_list << { :name => name, :hyoujun => hyoujun, :word => word, :route => route_info, :bit_char => bit_char, :bit_morph => bit_morph, :byte_pos => p_byte_pos }

        when /^\j\(([^\)=]+)\)=(.*)$/  # 字面
          name = $1
          info = $2
          p_byte_pos, hyoujun, katuyou, word, route_info, bit_char, bit_morph = info.split( /=/, 7 )
          if ! p_byte_pos.nil? then
            p_byte_pos = p_byte_pos.hex
          end
          if ! bit_char.nil? then
            bit_char = bit_char.hex
          end
          if ! bit_morph.nil? then
            bit_morph = bit_morph.hex
          end
          @jidura_list << { :name => name, :hyoujun => hyoujun, :word => word, :route => route_info, :bit_char => bit_char, :bit_morph => bit_morph, :byte_pos => p_byte_pos }
        
#        else
#          #raise ArgumentError, "Unkown format!"
#          $stderr.puts "Unkown format: #{line}"
=end
          
        end # end of case
      }

      # ReCalculate Coverage
      if ! @match_bit_char.nil? then
        if mask_bit_char != 0 then
          # @match_bit_char &= ~mask_bit_char
          @match_bit_char = @match_bit_char & ( input_bit_char ^ mask_bit_char )
        else
          @match_bit_char = @match_bit_char & input_bit_char
        end
        calculate_coverage( input_bit_char )
      end
      if ! @match_bit_morph.nil? then
        if mask_bit_morph != 0 then
          @match_bit_morph &= ~mask_bit_morph
        end
      end

    end # end of Solution.new
    attr_reader :pattern_id
    attr_reader :pattern_body
    attr_reader :route
    attr_reader :match_bit_char
    attr_reader :match_bit_morph
    attr_reader :var_list
    attr_reader :memory_list
#    attr_reader :movable_list
#    attr_reader :morph_list
    attr_reader :yousou_list
    attr_reader :jidura_list
    attr_reader :coverage

    def calculate_coverage( input_bit_char )
      if ! ( input_bit_char.nil? || @match_bit_char.nil? ) then
        @coverage = @match_bit_char.num_of_bits1.to_f / input_bit_char.num_of_bits1.to_f
      end
    end
    private :calculate_coverage

    # for Comparable
    # Order: converage > jidura_list.length > memory_list.length
    def <=> ( other )
      if other.kind_of?( Jpp::Matching::Solution ) then
        [ @coverage, other.pattern_id ] <=> [ other.coverage, @pattern_id ]
      else
        @coverage <=> other.to_i
      end
    end

    # 100% match?
    def full_match?()
      @coverage == 1.0
    end

  end

  # Matching Configuration Data Class
  class Configuration
    def initialize( map_pattern = nil, map_stype = Jpp::DEFAULT_MAP_STYPE, map_atype = Jpp::DEFAULT_MAP_ATYPE, jpp_rc = Jpp::DEFAULT_JPP_RC, reject_rsn_flag = false )
      @map_pattern  = map_pattern
      @map_stype    = map_stype
      @map_atype    = map_atype
      @jpp_rc       = jpp_rc
      self.reject_rsn = reject_rsn_flag
    end

    # Accessor
    attr_accessor :map_pattern
    attr_accessor :map_stype
    attr_accessor :map_atype
    attr_accessor :jpp_rc
    def reject_rsn()
      ( @reject_rsn == true )
    end
    def reject_rsn=( new_value )
      @reject_rsn = ( new_value == true )
    end

    # Validation
    def valid?()
      File.exist?( @map_pattern ) && File.exist?( @map_stype ) && File.exist?( @map_atype ) && File.exist?( @jpp_rc ) 
    end

  end # Jpp::Matching::Configuration

end # Jpp::Matching

end # Jpp
