class Linkify

Constants

DEFAULT_OPTIONS
DEFAULT_SCHEMAS
TLDS_2CH_SRC_RE

RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)

TLDS_DEFAULT

DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead

Attributes

__compiled__[RW]
__index__[RW]
__last_index__[RW]
__schema__[RW]
__text_cache__[RW]
bypass_normalizer[RW]
re[RW]

Public Class Methods

new(schemas = {}, options = {}) click to toggle source

new LinkifyIt(schemas, options)

  • schemas (Object): Optional. Additional schemas to validate (prefix/validator)

Creates new linkifier instance with optional additional schemas. Can be called without `new` keyword for convenience.

By default understands:

  • `http(s)://…` , `ftp://…`, `mailto:…` & `//…` links

  • “fuzzy” links and emails (example.com, foo@bar.com).

`schemas` is an object, where each key/value describes protocol/rule:

  • __key__ - link prefix (usually, protocol name with `:` at the end, `skype:` for example). `linkify-it` makes shure that prefix is not preceeded with alphanumeric char and symbols. Only whitespaces and punctuation allowed.

  • __value__ - rule to check tail after link prefix

    • String - just alias to existing rule

    • Object

      • validate - validator function (should return matched length on success), or `RegExp`.

      • normalize - optional function to normalize text & url of matched result (for example, for @twitter mentions).

`options`:

  • __fuzzyLink__ - recognige URL-s without `http(s):` prefix. Default `true`.

  • __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts like version numbers. Default `false`.

  • __fuzzyEmail__ - recognize emails without `mailto:` prefix.

# File lib/linkify-it-rb/index.rb, line 304
def initialize(schemas = {}, options = {})
  schemas = {} unless schemas

  # not needed
  # if (!(this instanceof LinkifyIt)) {
  #   return new LinkifyIt(schemas, options);
  # }

  # not needed, if you want to pass options, then must also pass schemas
  # if options.empty?
  #   if (isOptionsObj(schemas)) {
  #     options = schemas;
  #     schemas = {};
  #   }
  # }

  @__opts__           = DEFAULT_OPTIONS.merge(options)

  # Cache last tested result. Used to skip repeating steps on next `match` call.
  @__index__          = -1
  @__last_index__     = -1 # Next scan position
  @__schema__         = ''
  @__text_cache__     = ''

  @__schemas__        = {}.merge!(DEFAULT_SCHEMAS).merge!(schemas)
  @__compiled__       = {}

  @__tlds__           = TLDS_DEFAULT
  @__tlds_replaced__  = false

  @re                 = {}

  @bypass_normalizer  = false   # only used in testing scenarios

  compile
end

Public Instance Methods

add(schema, definition) click to toggle source

chainable LinkifyIt#add(schema, definition)

  • schema (String): rule name (fixed pattern prefix)

  • definition (String|RegExp|Object): schema definition

Add new rule definition. See constructor description for details.

# File lib/linkify-it-rb/index.rb, line 348
def add(schema, definition)
  @__schemas__[schema] = definition
  compile
  return self
end
compile() click to toggle source

Schemas compiler. Build regexps.

# File lib/linkify-it-rb/index.rb, line 117
def compile
  @re = build_re(@__opts__)

  # Define dynamic patterns
  tlds = @__tlds__.dup

  onCompile

  tlds.push(TLDS_2CH_SRC_RE) if (!@__tlds_replaced__)
  tlds.push(@re[:src_xn])

  @re[:src_tlds]         = tlds.join('|')
  @re[:email_fuzzy]      = Regexp.new(@re[:tpl_email_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
  @re[:link_fuzzy]       = Regexp.new(@re[:tpl_link_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
  @re[:link_no_ip_fuzzy] = Regexp.new(@re[:tpl_link_no_ip_fuzzy].gsub('%TLDS%', @re[:src_tlds]), true)
  @re[:host_fuzzy_test]  = Regexp.new(@re[:tpl_host_fuzzy_test].gsub('%TLDS%', @re[:src_tlds]), true)

  #
  # Compile each schema
  #

  aliases = []

  @__compiled__ = {} # Reset compiled data

  schemaError = lambda do |name, val|
    raise Error, ('(LinkifyIt) Invalid schema "' + name + '": ' + val)
  end

  @__schemas__.each do |name, val|

    # skip disabled methods
    next if (val == nil)

    compiled = { validate: nil, link: nil }

    @__compiled__[name] = compiled

    if (val.is_a? Hash)
      if (val[:validate].is_a? Regexp)
        compiled[:validate] = createValidator(val[:validate])
      elsif (val[:validate].is_a? Proc)
        compiled[:validate] = val[:validate]
      else
        schemaError(name, val)
      end

      if (val[:normalize].is_a? Proc)
        compiled[:normalize] = val[:normalize]
      elsif (!val[:normalize])
        compiled[:normalize] = createNormalizer()
      else
        schemaError(name, val)
      end
      next
    end

    if (val.is_a? String)
      aliases.push(name)
      next
    end

    schemaError(name, val)
  end

  #
  # Compile postponed aliases
  #

  aliases.each do |an_alias|
    if (!@__compiled__[@__schemas__[an_alias]])
      # Silently fail on missed schemas to avoid errons on disable.
      # schemaError(an_alias, self.__schemas__[an_alias]);
    else
      @__compiled__[an_alias][:validate]  = @__compiled__[@__schemas__[an_alias]][:validate]
      @__compiled__[an_alias][:normalize] = @__compiled__[@__schemas__[an_alias]][:normalize]
    end
  end

  #
  # Fake record for guessed links
  #
  @__compiled__[''] = { validate: nil, normalize: createNormalizer }

  #
  # Build schema condition, and filter disabled & fake schemas
  #
  slist = @__compiled__.select {|name, val| name.length > 0 && !val.nil? }.keys.map {|str| escapeRE(str)}.join('|')

  # (?!_) cause 1.5x slowdown
  @re[:schema_test]   = Regexp.new('(^|(?!_)(?:[><\uff5c]|' + @re[:src_XPCc] + '))(' + slist + ')', 'i')
  @re[:schema_search] = Regexp.new('(^|(?!_)(?:[><\uff5c]|' + @re[:src_XPCc] + '))(' + slist + ')', 'ig')

  @re[:pretest]       = Regexp.new(
                            '(' + @re[:schema_test].source + ')|' +
                            '(' + @re[:host_fuzzy_test].source + ')|' + '@', 'i')

  #
  # Cleanup
  #

  resetScanCache
end
createNormalizer() click to toggle source
# File lib/linkify-it-rb/index.rb, line 108
def createNormalizer()
  return lambda do |match, obj|
    obj.normalize(match)
  end
end
createValidator(re) click to toggle source
# File lib/linkify-it-rb/index.rb, line 99
def createValidator(re)
  return lambda do |text, pos, obj|
    tail = text.slice(pos..-1)

    (re =~ tail) ? tail.match(re)[0].length : 0
  end
end
escapeRE(str) click to toggle source
# File lib/linkify-it-rb/index.rb, line 88
def escapeRE(str)
  return str.gsub(/[\.\?\*\+\^\$\[\]\\\(\)\{\}\|\-]/, "\\$&")
end
match(text) click to toggle source

LinkifyIt#match(text) -> Array|null

Returns array of found link descriptions or `null` on fail. We strongly suggest recommend to use [[LinkifyIt#test]] first, for best speed.

##### Result match description

  • __schema__ - link schema, can be empty for fuzzy links, or `//` for protocol-neutral links.

  • __index__ - offset of matched text

  • __lastIndex__ - index of next char after mathch end

  • __raw__ - matched text

  • __text__ - normalized text

  • __url__ - link, generated from matched text

# File lib/linkify-it-rb/index.rb, line 480
def match(text)
  shift  = 0
  result = []

  # Try to take previous element from cache, if .test() called before
  if (@__index__ >= 0 && @__text_cache__ == text)
    result.push(Match.createMatch(self, shift))
    shift = @__last_index__
  end

  # Cut head if cache was used
  tail = shift ? text.slice(shift..-1) : text

  # Scan string until end reached
  while (self.test(tail))
    result.push(Match.createMatch(self, shift))

    tail   = tail.slice(@__last_index__..-1)
    shift += @__last_index__
  end

  if (result.length)
    return result
  end

  return nil
end
normalize(match) click to toggle source

LinkifyIt#normalize(match)

Default normalizer (if schema does not define it's own).

# File lib/linkify-it-rb/index.rb, line 544
def normalize(match)
  return if @bypass_normalizer

  # Do minimal possible changes by default. Need to collect feedback prior
  # to move forward https://github.com/markdown-it/linkify-it/issues/1

  match.url = "http://#{match.url}" if match.schema.empty?

  if (match.schema == 'mailto:' && !(/^mailto\:/i =~ match.url))
    match.url = 'mailto:' + match.url
  end
end
onCompile() click to toggle source

LinkifyIt#onCompile()

Override to modify basic RegExp-s.

# File lib/linkify-it-rb/index.rb, line 561
def onCompile
end
pretest(text) click to toggle source

LinkifyIt#pretest(text) -> Boolean

Very quick check, that can give false positives. Returns true if link MAY BE can exists. Can be used for speed optimization, when you need to check that link NOT exists.

# File lib/linkify-it-rb/index.rb, line 443
def pretest(text)
  return !(@re[:pretest] =~ text).nil?
end
resetScanCache() click to toggle source
# File lib/linkify-it-rb/index.rb, line 93
def resetScanCache
  @__index__      = -1
  @__text_cache__ = ''
end
set(options) click to toggle source

chainable LinkifyIt#set(options)

  • options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }

Set recognition options for links without schema.

# File lib/linkify-it-rb/index.rb, line 360
def set(options)
  @__opts__.merge!(options)
  return self
end
test(text) click to toggle source

LinkifyIt#test(text) -> Boolean

Searches linkifiable pattern and returns `true` on success or `false` on fail.

# File lib/linkify-it-rb/index.rb, line 369
def test(text)
  # Reset scan cache
  @__text_cache__ = text
  @__index__      = -1

  return false if (!text.length)

  # try to scan for link with schema - that's the most simple rule
  if @re[:schema_test] =~ text
    re = @re[:schema_search]
    lastIndex = 0
    while ((m = re.match(text, lastIndex)) != nil)
      lastIndex = m.end(0)
      len       = testSchemaAt(text, m[2], lastIndex)
      if len > 0
        @__schema__     = m[2]
        @__index__      = m.begin(0) + m[1].length
        @__last_index__ = m.begin(0) + m[0].length + len
        break
      end
    end
  end

  # guess schemaless links
  if (@__opts__[:fuzzyLink] && @__compiled__['http:'])
    tld_pos = text.index(@re[:host_fuzzy_test])
    if !tld_pos.nil?
      # if tld is located after found link - no need to check fuzzy pattern
      if (@__index__ < 0 || tld_pos < @__index__)
        if ((ml = text.match(@__opts__[:fuzzyIP] ? @re[:link_fuzzy] : @re[:link_no_ip_fuzzy])) != nil)

          shift = ml.begin(0) + ml[1].length

          if (@__index__ < 0 || shift < @__index__)
            @__schema__     = ''
            @__index__      = shift
            @__last_index__ = ml.begin(0) + ml[0].length
          end
        end
      end
    end
  end

  # guess schemaless emails
  if (@__opts__[:fuzzyEmail] && @__compiled__['mailto:'])
    at_pos = text.index('@')
    if !at_pos.nil?
      # We can't skip this check, because this cases are possible:
      # 192.168.1.1@gmail.com, my.in@example.com
      if ((me = text.match(@re[:email_fuzzy])) != nil)

        shift = me.begin(0) + me[1].length
        nextc = me.begin(0) + me[0].length

        if (@__index__ < 0 || shift < @__index__ ||
            (shift == @__index__ && nextc > @__last_index__))
          @__schema__     = 'mailto:'
          @__index__      = shift
          @__last_index__ = nextc
        end
      end
    end
  end

  return @__index__ >= 0
end
testSchemaAt(text, schema, pos) click to toggle source

LinkifyIt#testSchemaAt(text, name, position) -> Number

  • text (String): text to scan

  • name (String): rule (schema) name

  • position (Number): text offset to check from

Similar to [[LinkifyIt#test]] but checks only specific protocol tail exactly at given position. Returns length of found pattern (0 on fail).

# File lib/linkify-it-rb/index.rb, line 456
def testSchemaAt(text, schema, pos)
  # If not supported schema check requested - terminate
  if (!@__compiled__[schema.downcase])
    return 0
  end
  return @__compiled__[schema.downcase][:validate].call(text, pos, self)
end
tlds(list, keepOld) click to toggle source

chainable LinkifyIt#tlds(list [, keepOld]) -> this

  • list (Array): list of tlds

  • keepOld (Boolean): merge with current list if `true` (`false` by default)

Load (or merge) new tlds list. Those are user for fuzzy links (without prefix) to avoid false positives. By default this algorythm used:

  • hostname with any 2-letter root zones are ok.

  • biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф are ok.

  • encoded (`xn–…`) root zones are ok.

If list is replaced, then exact match for 2-chars root zones will be checked.

# File lib/linkify-it-rb/index.rb, line 524
def tlds(list, keepOld)
  list = list.is_a?(Array) ? list : [ list ]

  if (!keepOld)
    @__tlds__ = list.dup
    @__tlds_replaced__ = true
    compile
    return self
  end

  @__tlds__ = @__tlds__.concat(list).sort.uniq.reverse

  compile
  return self
end