You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
76 lines
2.2 KiB
76 lines
2.2 KiB
6 months ago
|
-- 本文件是页面抓取的主入口
|
||
|
-- 这里必须采用加载模块的方法,否则好像不能动态加载js文件
|
||
|
parser = require('parser')
|
||
|
|
||
|
function main(splash, args)
|
||
|
pages = {{$pages}}
|
||
|
scripts_js = '{{$scripts_js}}'
|
||
|
page_element = '{{$page_element}}'
|
||
|
wait_for = '{{$wait_for}}'
|
||
|
announcement_type = '{{$announcement_type}}'
|
||
|
splash:go('{{$url}}')
|
||
|
wait_for_element(splash, wait_for)
|
||
|
wait_for_element(splash, page_element)
|
||
|
|
||
|
-- 设置javascript脚本参数
|
||
|
results = {}
|
||
|
params_js = {}
|
||
|
params_js['announcement_type'] = announcement_type
|
||
|
|
||
|
-- 将第一页的结果加入返回结果集中
|
||
|
result = parser.select(splash, scripts_js, params_js)
|
||
|
table.insert(results, result)
|
||
|
|
||
|
if pages == 1 then
|
||
|
return results
|
||
|
else
|
||
|
-- 执行翻页动作
|
||
|
-- 先页面上的翻页元件(element),然后发送点击事件(click())翻页
|
||
|
for i = 2, pages do
|
||
|
-- 执行翻页脚本
|
||
|
-- js 中是javascript脚本,用于获取翻页的元件,并发送click事件
|
||
|
js = string.format("document.querySelector('%s').click();", page_element)
|
||
|
splash:runjs(js)
|
||
|
|
||
|
-- 等待页面加载完成
|
||
|
wait_for_element(splash, wait_for)
|
||
|
wait_for_element(splash, page_element)
|
||
|
|
||
|
-- 这个地方看来必须加上延时,否则页面加载不完全,可能还没有完成页面更新
|
||
|
assert(splash:wait(5))
|
||
|
result = parser.select(splash, scripts_js, params_js)
|
||
|
table.insert(results, result)
|
||
|
end
|
||
|
return results
|
||
|
end
|
||
|
end
|
||
|
|
||
|
function wait_for_element(splash, css, maxwait)
|
||
|
-- Wait until a selector matches an element
|
||
|
-- in the page. Return an error if waited more
|
||
|
-- than maxwait seconds.
|
||
|
if maxwait == nil then
|
||
|
maxwait = 10
|
||
|
end
|
||
|
return splash:wait_for_resume(string.format([[
|
||
|
function main(splash) {
|
||
|
var selector = '%s';
|
||
|
var maxwait = %s;
|
||
|
var end = Date.now() + maxwait*1000;
|
||
|
|
||
|
function check() {
|
||
|
if(document.querySelector(selector)) {
|
||
|
splash.resume('Element found');
|
||
|
} else if(Date.now() >= end) {
|
||
|
var err = 'Timeout waiting for element';
|
||
|
splash.error(err + " " + selector);
|
||
|
} else {
|
||
|
setTimeout(check, 200);
|
||
|
}
|
||
|
}
|
||
|
check();
|
||
|
}
|
||
|
]], css, maxwait))
|
||
|
end
|
||
|
|