module DateExtractor

Constants

DAY_RE
DAY_RE1
DAY_RE2
DAY_RE3
END_CHINESE_CHARACTER_TIME_RE
NUMBER_RE
ONLY_DAY_RE

NOTE: Use `(?!(?:間)|(?:ほど))` to reject `~日間` and `~日ほど`

RANGE_RE
RE
START_CHINESE_CHARACTER_TIME_RE

NOTE: Use `(?!間)` to reject `“〜時間”`

TIMESLOT_RE
TIMESLOT_RE1
TIMESLOT_RE2
TIMESLOT_RE3
TIMESLOT_RE4
VERSION
WDAY_RE

Public Class Methods

extract(body, fallback_month: nil, fallback_year: nil, debug: false) click to toggle source

@param [String] body @param [Integer | NilClass] fallback_month @param [Integer | NilClass] fallback_year @param [Boolean] debug @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates

# File lib/date_extractor.rb, line 120
def extract(body, fallback_month: nil, fallback_year: nil, debug: false)
  today = Date.today
  fallback_month ||= Date.today.month
  fallback_year  ||= Date.today.year

  day_matches = get_match_and_positions(body, RE)  # [[MatchData, start, end], [...], ...]

  day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug)  # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
  day_with_hours_size = day_matches.size

  timeslots_container = Array.new(day_with_hours_size) { Array.new }  # contains timeslots in each day

  timeslot_matches = get_match_and_positions(body, TIMESLOT_RE)  # [[MatchData, start, end], [...], ...]
  timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
    i = 0  # index of left_day

    while i < day_with_hours_size
      left_day = day_with_hours[i]
      if left_day[1].nil?  # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
        i += 1
        next end

      right_day = day_with_hours[i+1]
      if !right_day.nil? && right_day[1].nil?  # When failed to `Date.new(~)`
        right_day = day_with_hours[i+2]
      end

      if right_day.nil?  # left_day is on the last
        # Check if timeslot is on the right of left_day
        if left_day[0].end(0) <= start_pos
          timeslots_container[i].push timeslot_match
        end
      else
        # Check if timeslot is between left_day and right_day
        if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
          timeslots_container[i].push timeslot_match
        end
      end

      i += 1
    end
  end

  days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours)  # days contains day whidh has same index with timeslots_container

  result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
  result_strs      = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }

  if !debug  # Reject nil dates
    exists           = result_datetimes.map { |arr| !arr[0].nil? }
    result_strs      = result_strs.select.with_index { |str, i| exists[i] }
    result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
    [result_strs, result_datetimes]
  else
    [result_strs, result_datetimes]
  end
end

Private Class Methods

create_datetime_if_exists(year, month, day, hour, min) click to toggle source
# File lib/date_extractor.rb, line 220
def create_datetime_if_exists(year, month, day, hour, min)
  if !hour.nil?
    begin
      result = DateTime.new(year, month, day, hour.to_i, min.to_i)
    rescue
      result = nil
    end
  else
    result = nil
  end
  result
end
days_from_matches(matches, fallback_month, fallback_year, debug: false) click to toggle source

@return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]] If month is not specified, fallback_month is used as month. This value is updated by discovering other month specification. Same for fallback_year.

# File lib/date_extractor.rb, line 236
def days_from_matches(matches, fallback_month, fallback_year, debug: false)
  matches.map do |match|
    begin
      year = to_downer_letter(match[:year])
      fallback_year = year
    rescue
      year = fallback_year
    end

    # When ONLY_DAY_RE is used, month is nil
    begin
      month = to_downer_letter(match[:month]).to_i
      fallback_month = month
    rescue
      month = fallback_month
    end

    day = to_downer_letter(match[:day]).to_i

    start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)

    begin
      date = Date.new(year, month, day)
    rescue
      date = nil
    end

    start_t = create_datetime_if_exists(year, month, day, start_hour, start_min)
    end_t   = create_datetime_if_exists(year, month, day, end_hour, end_min)

    if !date.nil?
      [match, date, start_t, end_t]
    else
      [match, nil, nil, nil]
    end
  end
end
days_from_timeslot_matches(timeslots_container, day_with_hours) click to toggle source

days contains day whidh has same index with timeslots_container

# File lib/date_extractor.rb, line 275
def days_from_timeslot_matches(timeslots_container, day_with_hours)
  result = []

  day_with_hours.each_with_index do |day_with_hour, i|
    result.push(day_with_hour)
    _, day, _, _ = day_with_hour  #
    next if day.nil?

    timeslot_matches = timeslots_container[i]
    next if (timeslot_matches.size == 0)

    timeslot_matches.each do |timeslot_match|
      start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(timeslot_match)

      start_t = create_datetime_if_exists(day.year, day.month, day.day, start_hour, start_min)
      end_t   = create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)

      result.push([timeslot_match, day, start_t, end_t])
    end
  end

  result
end
get_hour_from_timeslot_match(match) click to toggle source
# File lib/date_extractor.rb, line 184
def get_hour_from_timeslot_match(match)
  begin
    start_hour = to_downer_letter(match[:start_hour])
  rescue
    start_hour = nil
  end

  begin
    start_min = to_downer_letter(match[:start_min])
  rescue
    if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
      start_min = 30
    else
      start_min = nil
    end
  end

  begin
    end_hour = to_downer_letter(match[:end_hour])
  rescue
    end_hour = nil
  end

  begin
    end_min = to_downer_letter(match[:end_min])
  rescue
    if match.names.include?('end_half_hour_unit') && match[:end_half_hour_unit] == '半'
      end_min = 30
    else
      end_min = nil
    end
  end

  [start_hour, start_min, end_hour, end_min]
end
get_match_and_positions(body, re) click to toggle source
# File lib/date_extractor.rb, line 180
def get_match_and_positions(body, re)
  body.to_enum(:scan, re).map { [Regexp.last_match, Regexp.last_match.begin(0), Regexp.last_match.end(0)] }
end
to_downer_letter(upper_or_downer_letter) click to toggle source
# File lib/date_extractor.rb, line 299
def to_downer_letter(upper_or_downer_letter)
  upper_or_downer_letter.split('').map do |c|
    if /[0-9]/.match(c)
      (c.ord - "0".ord).to_s
    else
      c
    end
  end.join
end