售前信息平台
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

75 lines
2.2 KiB

-- 本文件是页面抓取的主入口
-- 这里必须采用加载模块的方法,否则好像不能动态加载js文件
parser = require('parser')
function main(splash, args)
pages = {{$pages}}
scripts_js = '{{$scripts_js}}'
page_element = '{{$page_element}}'
wait_for = '{{$wait_for}}'
announcement_type = '{{$announcement_type}}'
splash:go('{{$url}}')
wait_for_element(splash, wait_for)
wait_for_element(splash, page_element)
-- 设置javascript脚本参数
results = {}
params_js = {}
params_js['announcement_type'] = announcement_type
-- 将第一页的结果加入返回结果集中
result = parser.select(splash, scripts_js, params_js)
table.insert(results, result)
if pages == 1 then
return results
else
-- 执行翻页动作
-- 先页面上的翻页元件(element),然后发送点击事件(click())翻页
for i = 2, pages do
-- 执行翻页脚本
-- js 中是javascript脚本,用于获取翻页的元件,并发送click事件
js = string.format("document.querySelector('%s').click();", page_element)
splash:runjs(js)
-- 等待页面加载完成
wait_for_element(splash, wait_for)
wait_for_element(splash, page_element)
-- 这个地方看来必须加上延时,否则页面加载不完全,可能还没有完成页面更新
assert(splash:wait(5))
result = parser.select(splash, scripts_js, params_js)
table.insert(results, result)
end
return results
end
end
function wait_for_element(splash, css, maxwait)
-- Wait until a selector matches an element
-- in the page. Return an error if waited more
-- than maxwait seconds.
if maxwait == nil then
maxwait = 10
end
return splash:wait_for_resume(string.format([[
function main(splash) {
var selector = '%s';
var maxwait = %s;
var end = Date.now() + maxwait*1000;
function check() {
if(document.querySelector(selector)) {
splash.resume('Element found');
} else if(Date.now() >= end) {
var err = 'Timeout waiting for element';
splash.error(err + " " + selector);
} else {
setTimeout(check, 200);
}
}
check();
}
]], css, maxwait))
end