2009/7/20 月曜日

ldr の画像を先読みでキャッシュ(/api/unread?prefetch契機で)

カテゴリー: インターネット — staki @ 16:06:23

前に作った奴とは全然別物。

ldr 本体の本文先読みを proxy で掴まえて、中身のインラインイメージを同時に先読みする。先読みといっても polipo なり gethtmlw あたりの proxy キャッシュに入れるだけ。

これ自体が proxy になっているので pac ファイルで http://reader.livedoor.com/api/prefetch の時だけ経由するようにして使うとか proxy 変更するナニかで適当に切り変えて使う。要するに ldr 専用串として。

http://gist.github.com/150186


#!/bin/ruby
# -*- coding: utf-8 -*-

require 'webrick'
require 'webrick/httpproxy'
require 'simple-json'
require 'zlib'
require 'stringio'
require 'rubygems'
require 'hpricot'
require 'net/telnet'
require 'open-uri'
require 'thread'

$WEBRICK_PROXY = 'http://localhost:8123/'
$PROXY_ADDR = 'http://localhost:8123/'
$USE_MOZREPL = true
$HTTP_THREAD_COUNT = 4

def debugLog(str)
  puts str
end

@q = []
def makeHttpThread
  myq = Queue.new
  @q << myq
  Thread.start do
    while str = myq.pop
      begin
        OpenURI.open_uri(str[0],{"Referer" => str[1],
                           :proxy => $PROXY_ADDR} ){|sio| sio.read }
      rescue Exception => e
        debugLog "ERR:" + e.to_s
      end
    end
  end
end

0.step($HTTP_THREAD_COUNT -1,1) do |n|
  makeHttpThread
end

handler = Proc.new() do |req,res|
  if req.unparsed_uri =~ %r!reader.livedoor.com! and
      req.path =~ %r!/api/unread! and
      req.query_string =~ %r!prefetch! and
      res['content-type'] =~ %r!text/javascript!

    buf = JsonParser.new.parse(
            Zlib::GzipReader.wrap(
              StringIO.new(res.body)){|z|z.read})

    buf['items'].each do |i|
      elms = Hpricot(i['body']).search("img")
      elms.each do |elm|
        if elm.attributes['src'].to_s =~ /^http:/
          debugLog "DEBUG URL PUSH1:" + elm.attributes['src'].to_s
          @q[rand($HTTP_THREAD_COUNT)].push [elm.attributes['src'].to_s, i['link']]
          debugLog "DEBUG URL PUSH2:" + elm.attributes['src'].to_s
        end
      end
    end
  end
end

s = WEBrick::HTTPProxyServer.new(
  :ProxyContentHandler => handler,
  :ProxyURI => URI.parse($WEBRICK_PROXY)
)

Signal.trap('INT') do
  @q.push nil
  s.shutdown
  telnet.close
end

s.start

ついでに mozrepl で直接ブラウザキャッシュに入れようと目論んで上手く動かなかった物も。多分、送ってる javascript をマトモにすれば動くのかなと思うんだけど、そっちはよくわからない。

http://gist.github.com/150188


#!/bin/ruby
# -*- coding: utf-8 -*-

require 'webrick'
require 'webrick/httpproxy'
require 'simple-json'
require 'zlib'
require 'stringio'
require 'rubygems'
require 'hpricot'
require 'net/telnet'
require 'open-uri'
#require 'yaml'
require 'thread'

$WEBRICK_PROXY = 'http://localhost:8080/'
$PROXY_ADDR = 'http://localhost:8080/'

def debugLog(str)
  puts str
end

$q = Queue.new

th = Thread.start do
  begin
    while str = $q.pop
      js = 'var myImg = new Image();myImg.src = "' + str[0] + '";'
      begin
        $telnet.puts(js)
      rescue => e
        initMozrepl()
      end
    end
  rescue =>e
    debugLog "EEERRR:" + e.to_s
  end
end

handler = Proc.new() do |req,res|
  if req.unparsed_uri =~ %r!reader.livedoor.com! and
      req.path =~ %r!/api/unread! and
      req.query_string =~ %r!prefetch! and
      res['content-type'] =~ %r!text/javascript!

    buf = JsonParser.new.parse(
            Zlib::GzipReader.wrap(
              StringIO.new(res.body)){|z|z.read})

    buf['items'].each do |i|
      elms = Hpricot(i['body']).search("img")
      elms.each do |elm|
        if elm.attributes['src'].to_s =~ /^http:/
          debugLog "DEBUG URL PUSH1:" + elm.attributes['src'].to_s
          $q.push [elm.attributes['src'].to_s, i['link']]
          debugLog "DEBUG URL PUSH2:" + elm.attributes['src'].to_s
        end
      end
    end
  end
end

def initMozrepl
  #mozrepl に回す場合
  $telnet = Net::Telnet.new({
    "Host" => "localhost",
    "Port" => 4242
  }){|c| print c}
end

initMozrepl()

s = WEBrick::HTTPProxyServer.new(
  :ProxyContentHandler => handler,
  :ProxyURI => URI.parse($WEBRICK_PROXY)
)

Signal.trap('INT') do
  $q.push nil
  s.shutdown
  telnet.close
end

s.start

proxy.pac のサンプル

[code='jscript]
function FindProxyForURL(url, host)
{

var proxy_yes = "PROXY localhost:8123; DIRECT";
var proxy2_yes = "PROXY localhost:80; DIRECT";
var proxy_no = "DIRECT";

if (shExpMatch(url, "http://reader.livedoor.com/*"))
{ return proxy2_yes }
if (isPlainHostName(host))
{ return proxy_no; }
if (url.substring(0, 4) == "ftp:")
{ return proxy_no; }
if (dnsDomainIs(host, "2ch.net"))
{ return proxy_no; }
if (isInNet(host, "10.0.0.0", "255.0.0.0") ||
isInNet(host, "127.0.0.0", "255.0.0.0") ||
isInNet(host, "169.254.0.0", "255.255.0.0") ||
isInNet(host, "192.168.0.0", "255.255.0.0")
) { return proxy_no; }

return proxy_yes;
}
[/code]

Powered by WordPress

stakilog