class Javlibrary

Constants

JAVLIBRARY_URL
VERSION

Attributes

database[RW]
password[RW]
url[RW]
username[RW]

Public Class Methods

new(database_name = 'javlibrary', user = 'root', pwd = 'default') click to toggle source
# File lib/javlibrary.rb, line 12
def initialize(database_name = 'javlibrary', user = 'root', pwd = 'default')
    # Define client variable
    @database = database_name
    @username = user
    @password = pwd

    # Define default Javlibrary url
    @url = JAVLIBRARY_URL[1]
end

Public Instance Methods

actor_hash() click to toggle source
# File lib/javlibrary.rb, line 164
def actor_hash
    client = client()
    actor_hash = Hash.new
    client.query("SELECT * FROM actor").each do |item|
        actor_hash["#{item['actor_name']}"] = item['actor_id']
    end
    client.close

    actor_hash
end
author_page_num(nokogiri_doc) click to toggle source
# File lib/javlibrary.rb, line 216
def author_page_num(nokogiri_doc)
    last_page = 1
    nokogiri_doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
        last_page = row['href'].split("=")[-1].to_i
    end
    last_page
end
client() click to toggle source
# File lib/javlibrary.rb, line 24
def client()
    client = Mysql2::Client.new(:host => "127.0.0.1",
                                :username => "#{@username}",
                                :password => "#{@password}",
                                :database => "#{@database}")
end
download_all_actors()
Alias for: get_all_actor
download_all_genres()
Alias for: genres_insert
download_all_video_labels() click to toggle source
# File lib/javlibrary.rb, line 322
def download_all_video_labels
    thread_pool =[]
    'A'.upto('Z').each do |alphabet|
        thread_temp = Thread.new{
            select_actor(alphabet)
        }
        thread_pool << thread_temp
    end
    thread_pool.map(&:join)
end
download_all_videos() click to toggle source
# File lib/javlibrary.rb, line 146
def download_all_videos
    client = client()
    result = client.query("SELECT * FROM label WHERE video_download=0")
    client.close
    result = result.collect{ |x| x }; result.shuffle!
    
    actor_hash = actor_hash()
    genre_hash = genre_hash()
    result.each do |item|
        begin
            video_info_insert(item['video_num'], item['video_label'],
                actor_hash, genre_hash)
        rescue
            next
        end
    end
end
download_all_videos_thread() click to toggle source
# File lib/javlibrary.rb, line 112
def download_all_videos_thread
    client = client()
    result = client.query("SELECT video_num, video_label FROM label WHERE video_download=0")
    client.close

    video_array = Array.new
    result.each do |item|
        video_array << item
    end

    video_array = video_array.each_slice(5000).to_a

    actor_hash = actor_hash()
    genre_hash = genre_hash()
    thread_pool = Array::new

    video_array.each do |group|
        # Create a download thread
        thread_temp = Thread.new {
            group.each do |item|
                begin
                    video_info_insert(item['video_num'], item['video_label'],
                        actor_hash, genre_hash)
                rescue
                    next
                end
            end
        }
        thread_pool << thread_temp
    end

    thread_pool.map(&:join)
end
download_video_label(actor_id) click to toggle source
# File lib/javlibrary.rb, line 267
def download_video_label(actor_id)
    firsturl = "http://www.#{@url}/ja/vl_star.php?s=#{actor_id}"
    baseurl = "http://www.#{@url}/ja/vl_star.php?&mode=&s=#{actor_id}&page="

    begin
        response = RestClient.get firsturl
    rescue
        retry
    end

    doc = Nokogiri::HTML(response.body)
    last_page = 1
    doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
        last_page = row['href'].split("=")[-1].to_i
    end

    result = []
    1.upto(last_page) do |page|
        tempurl = baseurl + page.to_s
        begin
            response = RestClient.get tempurl
        rescue
            retry
        end

        Nokogiri::HTML(response.body).search('//div[@class="video"]/a').each do |row|
            # Data:
            # Video_label: row['href'].split("=")[-1]
            # Video_title: row['title']
            # client.query("INSERT INTO label (lable) VALUES ('#{row['href'].split("=")[-1]}')")
            result << row['href'].split("=")[-1]
        end
    end

    client = client()
    result.each do |e|
        begin
            client.query("INSERT INTO label (video_label, video_download) VALUES ('#{e}', '0')")
        rescue
            next
        end
    end
    client.close
end
downloader(identifer) click to toggle source
# File lib/javlibrary.rb, line 31
def downloader(identifer)
    baseurl = "http://www.#{@url}/cn/?v=#{identifer}"
    response = Mechanize.new
    response.user_agent = Mechanize::AGENT_ALIASES.values[rand(21)]
    response.read_timeout = 2
    response.open_timeout = 2
    begin
        response.get baseurl
    rescue Timeout::Error
        retry
    rescue
        return
    end

    doc = Nokogiri::HTML(response.page.body)

    video_title, details, video_genres, video_jacket_img = String.new, Array.new, String.new, String.new

    video_title = doc.search('div[@id="video_title"]/h3/a').children.text
    doc.search('//div[@id="video_info"]/div[@class="item"]/table/tr/td[@class="text"]').map do |row|
        details << row.children.text
    end

    doc.search('//div[@id="video_genres"]/table/tr/td[@class="text"]/span[@class="genre"]/a').each do |row|
        video_genres << row.children.text << " "
    end

    doc.search('//img[@id="video_jacket_img"]').each do |row|
        video_jacket_img = row['src']
    end

    # return data format: title$id$date$director$maker$label$cast$genres$img_url
    "#{video_title}$#{details[0]}$#{details[1]}$#{details[2]}$#{details[3]}$#{details[4]}$#{details[-1]}$#{video_genres}$#{video_jacket_img}"
    #result = Hash.new
    #result["title"] = video_title; result["id"] = details[0]; result["date"] = details[1]
    #result["director"] = details[2]; result["maker"] = details[3]; result["label"] = details[4]
    #result["cast"] = details[-1]; result["genres"] = video_genres; result["img_url"] = video_jacket_img
end
genre_hash() click to toggle source
# File lib/javlibrary.rb, line 175
def genre_hash
    client = client()
    category_hash = Hash.new
    client.query("SELECT * FROM category").each do |item|
        category_hash["#{item['category_name']}"] = item['category_id']
    end
    client.close

    category_hash
end
genres() click to toggle source
# File lib/javlibrary.rb, line 186
def genres
    response = Mechanize.new; genres = Array.new
    begin
        response.get "http://www.#{@url}/cn/genres.php"
    rescue
        retry
    end

    Nokogiri::HTML(response.page.body).search('//div[@class="genreitem"]/a').each do |row|
        genres << row.children.text
    end
    genres.uniq
end
genres_insert() click to toggle source
# File lib/javlibrary.rb, line 200
def genres_insert
    client = client()
    genres = genres()
    genres.each do |e|
        begin
            client.query("INSERT INTO category (category_name) VALUES ('#{e}')")
        rescue
            next
        end
    end

    client.close
end
Also aliased as: download_all_genres
get_all_actor() click to toggle source
# File lib/javlibrary.rb, line 224
def get_all_actor
    firsturl = "http://www.#{@url}/cn/star_list.php?prefix="

    client = client()
    'A'.upto('Z') do |alphabet|
        tempurl = firsturl + alphabet
        begin
            response = RestClient.get tempurl
        rescue
            retry
        end

        doc = Nokogiri::HTML(response.body)
        last_page = author_page_num(doc)

        1.upto(last_page) do |page_num|
            temp_page_url = tempurl + "&page=#{page_num.to_s}"
            begin
                response_page = RestClient.get temp_page_url
            rescue
                retry
            end
            
            doc_page = Nokogiri::HTML(response_page.body)
            doc_page.search('//div[@class="starbox"]/div[@class="searchitem"]/a').each do |row|
                # row.text Actor.name
                # row['href'].split("=")[-1] Actor.label
                name = row.text; label = row['href'].split("=")[-1]
                begin
                    client.query("INSERT INTO actor (actor_name, actor_label, type)
                        VALUES ('#{name}', '#{label}', '#{alphabet}')")
                rescue
                    next
                end
            end
        end
    end

    client.close
end
Also aliased as: download_all_actors
select_actor(type) click to toggle source
# File lib/javlibrary.rb, line 312
def select_actor(type)
    client = client()
    result = client.query("SELECT actor_label FROM actor WHERE type='#{type}'")
    client.close

    result.each do |e|
        download_video_label(e["actor_label"])
    end
end
video_info_insert(index, identifer, actor_hash, genres_hash) click to toggle source
# File lib/javlibrary.rb, line 70
def video_info_insert(index, identifer, actor_hash, genres_hash)
    client = client()

    result = downloader(identifer)
    
    return nil if result == nil
    title, id, date, director, maker, label, cast_tmp, genres_tmp, img_url = result.split('$')
    cast = cast_tmp.split.reject(&:empty?)
    genres = genres_tmp.split.reject(&:empty?)
    
    begin
        client.query("INSERT INTO video (video_id,video_name,license,url,director,label,date,maker)
        VALUES (#{index},'#{title}','#{id}','#{img_url}','#{director}','#{label}','#{date}','#{maker}')")
    rescue
        client.query("UPDATE label SET video_download=0 WHERE video_num=#{index}")
        return nil
    end
    
    cast.each do |a|
        a_tmp = actor_hash[a]
        next if a_tmp == nil
        begin
            client.query("INSERT INTO v2a (v2a_fk_video,v2a_fk_actor) VALUES(#{index}, #{a_tmp.to_i})")
        rescue
            next
        end
    end

    genres.each do |g|
        g_tmp = genres_hash[g]
        next if g_tmp == nil
        begin
            client.query("INSERT INTO v2c (v2c_fk_video,v2c_fk_category) VALUES(#{index}, #{g_tmp.to_i})")
        rescue
            next
        end
    end

    client.query("UPDATE label SET video_download=1 WHERE video_num=#{index}")
    client.close
end