#!/usr/bin/ruby -Ke

# ===============================
# Jpp::Morph Library
# Version 1.1
# 2004/12/26
# Keichiro Katayama
# ===============================

# Change Logs...
# Version 1.0(2004/11/07)
#   First Release
# Version 1.1(2004/12/26)
#   Fix for morph-data parser
#   (hinshi-code field with upper-case alphabet couldn't parse)
#   ->ex: "21. /iVQPjSTR|WR(1A00)" from WJ161754-02

module Jpp

  class Morph < Array

    def initialize
      super
    end

    def to_s
      self.collect{ |mitem| mitem.to_s }.join( "\n" )
    end

    # Generate Morph Analyze Information
    def to_morph_info( sentence_id = 'WJ000000-00-0' )
      # Create Sentence
      sentence = self.collect{ |mitem| mitem.word }.join( '' )

      # Prepare bit_morph and bit_char
      bit_morph = self.length.fill_bits1
      bit_char  = sentence.split( // ).length.fill_bits1

      # Result
      buffer = Array.new
      
      # INPUT=<sentence_id>=<bit_char>=<bit_morph>
      buffer << sprintf( 'INPUT=%s=0x%x=0x%x', sentence_id, bit_char, bit_morph )
      buffer << sentence
      buffer += self.collect{ |mitem| mitem.to_s }
      buffer << sprintf( '%d. /nil', self.length + 1 )

      return buffer
    end

    def slice_by_bits( bitmap )
      slice( bitmap.num_of_training_zero, bitmap.num_of_continuous_bits1_from_ntz )
    end

    def slice_by_bits!( bitmap )
      slice!( bitmap.num_of_training_zero, bitmap.num_of_continuous_bits1_from_ntz )
    end

    def renumber!
      self.each_index{ |i|
        self[i].number = i + 1
      }
    end
    
    # Deep-Copy dup method
    def my_dup
      new_morph = Jpp::Morph.new
      new_morph.replace( self.collect{ |mitem| mitem.my_dup } )
      return new_morph
    end

    def Morph.analyze( sentence )
      morph_fp = Morph.analyze_internal( sentence )
      if morph_fp.nil? then
        # Failed
        return nil
      end

      morph_fp.open
      morph_result = Morph.load_morph_result( morph_fp )
      morph_fp.close

      return morph_result
    end

    # Morph Analyze (return: IO of result file)
    def Morph.analyze_internal( sentence )
      if sentence.empty? then
        return nil
      end
      
      # Jpp2
      raise MorphAnalyzeError, "Unsupport morph-analyze with Jpp2!"

      # Create Sentence File
      sent_fp = Tempfile.new( 'jppmorph' )
      sent_fp.puts "S0:#{sentence}"
      sent_fp.close( false )

      # Create Morph File
      morph_fp = Tempfile.new( 'jppmorph' )
      morph_fp.close( false )

      # Call MORPH_CMD
      cmd_line = [ Jpp.morph_cmd, sent_fp.path, morph_fp.path, Jpp.ruigo_dic ].join( " " )
      #system( "#{cmd_line} 2> /dev/null > /dev/null" )
      buffer_stdout = buffer_stderr = nil # Init
      Open3.popen3( cmd_line ) { |io_in, io_out, io_err|
        buffer_stdout = io_out.readlines
        buffer_stderr = io_err.readlines
      }

      # Check Result
      if File.size( morph_fp.path ) == 0 then
        # Failed
        $stderr.puts "Morph failed!"
        $stderr.puts buffer_stderr
        return nil
      end

      return morph_fp
    end

    # Create Array(MorphIterm...) by morph_result_enum(Enumerable)
    def Morph.load_morph_result( morph_result_enum )
      morph_result = Morph.new
      morph_result_enum.each{ |line|
        line.chomp!
        case line
        when  /^(\d+)\. ([\/+])(.*)$/
          if $3 != 'nil' then
            mitem = Morph::MorphItem.new( line )
            morph_result << mitem
          end
        when /^INPUT=/
          next
        when /^[\xA1-\xFE][\xA1-\xFE]/n
          next
        else
          raise MorphAnalyzeError, 'Illegal morph data!(%s)' % line
        end
      }
      return morph_result
    end

    class MorphItem
      include Comparable
      
      # Regexp for Morph Line Parse /          (1)Hinshi-Option (2)Hinshi-Code       (3)Hyoujun-Hyouki     (4)Hyoujun-Hyouki-2   (5)Imi-zokusei
      MORPH_LINE_REGEXP = Regexp.new( '^\(' + '(\[[^\]]\])?' + '([0-9a-fA-F]{4}|[0-9a-fA-F]{8})' + '(?:,([^,\{\}]+))?' + '((?:,[^,\{\}]+)*)' + '(?:,\{([^\}]+)\})?' + '\)$' )

      def initialize( result_line = nil )
        # Init
        @number = 0
        @phrase_start = false
        @info = Array.new
        @word = ""

        # Check
        if ( ! result_line.nil? ) && /^(\d+)\. ([\/+])(.*)$/ =~ result_line then
          number = $1
          boundary_char = $2
          body = $3
	  body.gsub!( /,,/, ',' ) # ad-hoc fix

          @number = number
          @phrase_start = ( boundary_char == '/' )
          
          # Make Word Info
          if body == 'nil' then
            raise ArgumentError
          end
          info = body.scan( /\([^\)]+\)/ )
          if info.empty? then
            @word = body
          else
            body =~ /^([^(]+)\(/
            @word = $1
            hinshi_opt = hinshi = hyoujun = hyoujun_henkei = temp = imi = nil
            @info = info.collect{ |info_data|
              temp = MORPH_LINE_REGEXP.match( info_data ) # Parse
              if temp.nil? then
                raise MorphAnalyzeError, 'Illegal data format morph data!(%s)' % info_data
              end
              hinshi_opt = temp[1] # [P], [U], [*], etc...
              hinshi     = temp[2] # 1100, 7530, 1A00, etc...
              hyoujun    = temp[3]
              imi        = temp[5]
              if temp[4].empty? then
                hyoujun_henkei = nil
              else
                hyoujun_henkei = temp[4].split( /,/ ).reject{ |item| item.empty? }
              end

              # === Original Code ===
              # info_data =~ /^\((\[[A-Z\*]+\])?[0-9a-fA-Z]{4}/
              # hinshi_opt = Regexp.last_match[1] # [P], [U] ȂǁB
              # info_data.sub!( /^\(\[[^\]]?\]([0-9a-fA-Z]{4})/, '(\1' )
              # info_data.sub!( /^\(([^\)]*)\)$/, '\1' )
              #p info_data
              # /^([0-9a-f]{4})(?:,([^,\{\}]+))?((?:,[^,\{\}]+)*)(?:,\{([^\}]+)\})?$/ =~ info_data
              # hinshi = Regexp.last_match[1].upcase
              # hyoujun = Regexp.last_match[2]
              # imi = Regexp.last_match[4]
              # if Regexp.last_match[3].nil? then
              #   hyoujun_henkei = nil
              # else
              #   temp = Regexp.last_match[3]
              #   hyoujun_henkei = temp.split( /,/ ).reject{ |item| item.empty? }
              # end

              { :hinshi => hinshi, :hyoujun => hyoujun, :hyoujun_henkei => hyoujun_henkei, :imi => imi, :hinshi_opt => hinshi_opt }
            }
          end
        end
      end
      attr_accessor :number
      attr_accessor :phrase_start
      attr_accessor :word
      attr_accessor :info
      
      def to_s
        tmp = nil
        info_str = @info.collect{ |info_data|
                     tmp = Array.new
                     if info_data[:hinshi_opt].nil? then
                       tmp << info_data[:hinshi]
                     else
                       tmp << info_data[:hinshi_opt] + info_data[:hinshi]
                     end
                     if @word != info_data[:hyoujun] then
                       tmp << info_data[:hyoujun]
                     end
                     if ! info_data[:hyoujun_henkei].nil? then
                       tmp += info_data[:hyoujun_henkei].reject{ |item| item == @word }
                     end
                     if ! info_data[:imi].nil? then
                       tmp << '{' + info_data[:imi] + '}'
                     end
                     '(' + tmp.reject{ |item| item.nil? }.join(',') + ')'
                   }.join( '' )
        boundary = nil
        if @phrase_start then
          boundary = '/'
        else
          boundary = '+'
        end
        @number.to_s + '. ' + boundary + @word + info_str
      end

      # Deep-copy dup method
      def my_dup
        new_mitem = Jpp::Morph::MorphItem.new(nil)
        new_mitem.number = @number
        new_mitem.phrase_start = @phrase_start
        if ! @word.nil? then
          new_mitem.word = @word.dup
        end
        new_info = Array.new
        info.each{ |info_data|
          new_info_data = Hash.new
          info_data.each{ |key,value|
            if value.nil? then
              new_info_data[key] = nil
            else
              new_info_data[key] = value.dup
            end
          }
          new_info << new_info_data
        }
        new_mitem.info = new_info
        return new_mitem
      end

      # for Comparable
      def <=> ( other )
        if other.kind_of?( Jpp::Morph::MorphItem ) then
          if @number == other.number then
            return @word <=> other.word
          else
            return @number <=> other.number
          end
        else
          return @number <=> other
        end
      end

    end
  end
end

