{"id":22,"date":"2005-04-14T18:04:53","date_gmt":"2005-04-14T09:04:53","guid":{"rendered":"http:\/\/chinkoclub.s345.xrea.com\/blog.252523.net\/?p=22"},"modified":"2010-07-23T05:40:07","modified_gmt":"2010-07-22T20:40:07","slug":"spidering_hacks","status":"publish","type":"post","link":"http:\/\/252523.net\/blog\/archives\/spidering_hacks","title":{"rendered":"\u201cSpidering Hacks\u201d"},"content":{"rendered":"<p>&#8220;Practical Extraction and Report Language&#8221; \u76f4\u8a33\u3059\u308b\u3068\u300c\u5b9f\u7528\u7684\u62bd\u51fa\u30fb\u5831\u544a\u66f8\u4f5c\u6210\u8a00\u8a9e\u300d\u3068\u306a\u308b\u304c\u3001\u305d\u3053\u3092\u307e\u3068\u3081\u3066\u300cPerl\u300d\u3068\u30aa\u30b7\u30e3\u30ec\u306b\u547c\u3076\u306e\u304c\u901a\u306a\u8a00\u8a9e\u306a\u306e\u3060\u3051\u3069\u3001\u3053\u3053\u4e00\u30f6\u6708\u304f\u3089\u3044\u52c9\u5f37\u3092\u517c\u306d\u3066\u81ea\u5206\u7528\u306e\u5c0f\u3055\u306aweb\u30a2\u30d7\u30ea\u3092\u4f5c\u6210\u3057\u3066\u3044\u308b\u3002\u3053\u306e\u8a00\u8a9e\u81ea\u4f53\u306f\u304b\u306a\u308a\u6614\u304b\u3089\u5b58\u5728\u3059\u308b\u3082\u306e\u306e\u3001\u3060\u304b\u3089\u3068\u8a00\u3063\u3066\u307f\u3093\u306a\u304c\u901a\u3063\u3066\u304f\u308b\u308f\u3051\u3058\u3083\u306a\u3044\u3002\u3082\u3061\u308d\u3093\u73fe\u5f79\u306e\u8a00\u8a9e\u3060\u3057\u3001\u69d8\u3005\u306a\u5206\u91ce\u30fb\u5834\u6240\u3067\u4f7f\u308f\u308c\u3066\u3044\u308b\u3002<\/p>\n\n<!--more-->\n\n<p>\u305f\u3060\u3001\u6642\u4ee3\u306f\u63a8\u79fb\u3057\u3066\u3001\u65b0\u3057\u3044\u8a00\u8a9e\u304c\u751f\u307e\u308c\u3066\u306f\u53e4\u304f\u306a\u308b\u3002\u305d\u3093\u306a\u4e2d\u3067 CGI(Common Gateway Interface) \u304c\u4f7f\u308f\u308c\u308b\u3088\u3046\u306b\u306a\u3063\u3066\u304b\u3089\u3001\u73fe\u5728\u306e\u3088\u3046\u306bPHP\u304c\u51fa\u3066\u304d\u3066\u5e2d\u5dfb\u3059\u308b\u307e\u3067\u3001\u3053\u306e\u5e02\u5834\u306fPerl\u72ec\u5360\u72b6\u614b\u3060\u3063\u305f\u3088\u3046\u306b\u601d\u3046\u3002\u81ea\u5206\u306f\u30b9\u30af\u30ea\u30d7\u30c8\u8a00\u8a9e\u3067\u306fPHP\u304b\u3089\u5165\u3063\u305f\u30af\u30c1\u3060\u3051\u3069\u3082\u3001\u4ecaPerl\u3092\u89e6\u308b\u3088\u3046\u306b\u306a\u308a\u3001\u3053\u306e\u9aa8\u3063\u307d\u304f\u8089\u3063\u307d\u3044\u8a00\u8a9e\u304cweb\u3068\u3044\u3046\u4e16\u754c\u306e\u88cf\u65b9\u3067\u7a3c\u52d5\u3057\u307e\u304f\u3063\u3066\u3044\u305f\u306e\u3060\u3068\u601d\u3046\u3068\u76ee\u304b\u3089\u6c34\u304c\u3001\u3001\u3001\u30b3\u3001\u30b3\u30ec\u30ac\u3001\u3001\u30cb\u30f3\u30b2\u30f3\u30ce\u3001\u3001\u3001\u30ca\u30df\u30c0\u30c8\u30a4\u30a6\u30e2\u30ce\u30ab\u3002\u3068\u306a\u308b<\/p>\n\n<p>\u305d\u3057\u3066\u3001Perl\u306e\u5165\u9580\u66f8\u306b\u9078\u3093\u3060\u306e\u304c O&#8217;Reilly \u306e <a href=\"http:\/\/www.oreilly.co.jp\/books\/4873111870\/\">&#8220;Spidering Hacks&#8221;<\/a> \u306a\u308b\u672c\u3002\u307e\u3060Perl\u3092\u3055\u308f\u3063\u305f\u3053\u3068\u3082\u306a\u3044\u306e\u306b\u3001\u3053\u308c\u3092\u30c1\u30e7\u30a4\u30b9\u3059\u308b\u3068\u306f\u3001\u306a\u304b\u306a\u304b\u6b8a\u52dd\u306a\u5fc3\u304c\u3051\u3002\u4eca\u898b\u3066\u307f\u308b\u3068\u307b\u3068\u3093\u3069\u30e2\u30b8\u30e5\u30fc\u30eb\u3068web\u6280\u8853\u306e\u7d61\u307f\u5408\u308f\u305b\u3001web\u4e0a\u306b\u306fperl\u30e2\u30b8\u30e5\u30fc\u30eb\u306e\u826f\u8cea\u306a\u30b5\u30f3\u30d7\u30eb\u3084\u4f7f\u3044\u65b9\u306f\u5c11\u306a\u3044\u305f\u3081\u3001\u975e\u5e38\u306b\u53c2\u8003\u306b\u306a\u308b\u672c\u3067\u3057\u305f\u3002\u307e\u3001\u3044\u308f\u3086\u308b\u30ea\u30d5\u30a1\u30ec\u30f3\u30b9\u672c\u3068\u306f\u9055\u3063\u3066\u5b9f\u7528\u7684\u3002<\/p>\n\n<p>\u305d\u3046\u8a00\u3048\u3070\u3001\u5df7\u306b\u306f\u30ea\u30d5\u30a1\u30ec\u30f3\u30b9\u672c\u304c\u6570\u591a\u304f\u51fa\u56de\u3063\u3066\u308b\u3051\u3069\u3082\u3002\u3082\u3061\u308d\u3093\u81ea\u5206\u3082\u4f55\u518a\u304b\u306f\u6301\u3063\u3066\u308b\u3051\u3069\u3001\u983b\u7e41\u306b\u4f7f\u3046\u3053\u3068\u3063\u3066\u5c11\u306a\u3044\u3001\u3001\u3001\u7d50\u5c40\u30aa\u30f3\u30e9\u30a4\u30f3\u3067\u8abf\u3079\u305f\u308a\u3001\u81ea\u5206\u3067\u66f8\u304d\u51fa\u3057\u305f\u95a2\u6570\u30ea\u30b9\u30c8\u3068\u304b\u3067\u9593\u306b\u5408\u3046\u3093\u3060\u3051\u3069\u3002\u7d19\u5a92\u4f53\u3067\u6b32\u3057\u3044\u3082\u306e\u3068\u8a00\u3063\u305f\u3089\u3001A4\u30b5\u30a4\u30ba\u304f\u3089\u3044\u3067\u30da\u30e9\u30da\u30e9\u306b\u8584\u3044\u95a2\u6570\u8868\u3068\u304b\u5c5e\u6027\u8868\u304b\u306a\u3002\u7d50\u5c40\u306e\u3068\u3053\u306a\u3093\u3060\u3088\u306a\u3041\u3001\u5165\u9580\u7528\u306b\u30a8\u30a4\u30c3\u3068\u8cfc\u5165\u3057\u305f\u3082\u306e\u306e\u3001&#8221;\u91cd\u3057&#8221;\u304f\u3089\u3044\u306b\u3057\u304b\u4f7f\u3063\u3066\u306a\u3044\u3057\u3002\u5834\u6240\u306f\u53d6\u308b\u3057\u3001\u7a4d\u307f\u91cd\u306d\u3066\u304a\u304f\u3068\u96ea\u5d29\u304c\u8d77\u3053\u308b\u3057\u3002\u3042\u30fc\u3001\u3001\u81ea\u5206\u3067\u4f5c\u308a\u3083\u3044\u3044\u3093\u3060\u3001\u305d\u3046\u3057\u3088\u3063\u305f\u3089\u305d\u3046\u3057\u307e\u3057\u3087\u3002<\/p>\n\n<p>\u3053\u306e\u672c\u3067\u52c9\u5f37\u3057\u305f\u3001HTML::TreeBuilder, HTML::TokeParser \u306e\u3088\u3046\u306b\u3001HTML::Parser\u3092\u4f7f\u7528\u3057\u3066\u3044\u308b\u30e2\u30b8\u30e5\u30fc\u30eb\u306f\u5185\u90e8\u3067\u30c7\u30fc\u30bf\u3092UTF-8\u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u3057\u3066\u3057\u307e\u3046\u306e\u3067\u3001\u30e2\u30b8\u30e5\u30fc\u30eb\u304b\u3089\u30c7\u30fc\u30bf\u3092\u53d6\u5f97\u3059\u308b\u3068\u304d\u306f decode \u3092\u4f7f\u3063\u3066\u3082\u3068\u306b\u623b\u3057\u307e\u3057\u3087\u3046\u3002\u3053\u306e\u4ef6\u3084perl\u306eUnicode\u307e\u308f\u308a\u306e\u30c8\u30e9\u30d6\u30eb\u306b\u3064\u3044\u3066\u306f<a href=\"http:\/\/naoya.dyndns.org\/~naoya\/mt\/archives\/000611.html\">Perl 5.8 \u4ee5\u964d\u306b\u304a\u3044\u3066\u306e Unicode \u6587\u5b57\u5217\u306e\u6271\u3044\u65b9 : NDO::Weblog:<\/a> \u3092\u53c2\u7167\u3055\u308c\u305f\u3057\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>&#8220;Practical Extraction and Report Language&#8221; \u76f4\u8a33\u3059\u308b\u3068\u300c\u5b9f\u7528\u7684\u62bd\u51fa\u30fb\u5831\u544a\u66f8\u4f5c\u6210\u8a00\u8a9e\u300d\u3068\u306a\u308b\u304c\u3001\u305d\u3053\u3092\u307e\u3068\u3081\u3066\u300cPerl\u300d\u3068\u30aa\u30b7\u30e3\u30ec\u306b\u547c\u3076\u306e\u304c\u901a\u306a\u8a00\u8a9e\u306a\u306e\u3060 [&#8230;]<\/p>","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[11],"tags":[],"class_list":["post-22","post","type-post","status-publish","format-standard","hentry","category-perl"],"_links":{"self":[{"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/posts\/22","targetHints":{"allow":["GET"]}}],"collection":[{"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/comments?post=22"}],"version-history":[{"count":2,"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/posts\/22\/revisions"}],"predecessor-version":[{"id":359,"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/posts\/22\/revisions\/359"}],"wp:attachment":[{"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/media?parent=22"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/categories?post=22"},{"taxonomy":"post_tag","embeddable":true,"href":"http:\/\/252523.net\/blog\/wp-json\/wp\/v2\/tags?post=22"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}