You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
60 lines
1.7 KiB
60 lines
1.7 KiB
8 months ago
|
#!/usr/bin/python3
|
||
|
"""这是爬虫的主程序主程序
|
||
|
作者:陈进钱
|
||
|
日期:2023/11/03
|
||
|
"""
|
||
|
|
||
|
import pymysql
|
||
|
import datetime
|
||
|
import time
|
||
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
||
|
from properties import Properties
|
||
|
from crawler import Crawler
|
||
|
|
||
|
print(
|
||
|
"""采购信息采集器 v1.0
|
||
|
===================================================================================
|
||
|
这个程序用于获取各大招投标网站的采购信息
|
||
|
version: 1.0
|
||
|
作者:陈进钱
|
||
|
日期:2023-11-04
|
||
|
===================================================================================""")
|
||
|
|
||
|
# 设置运行环境。如果当前是测试环境,则将is_test设置为true
|
||
|
is_test = False
|
||
|
|
||
|
if is_test:
|
||
|
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
|
||
|
database = 'guoyantest'
|
||
|
else:
|
||
|
file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
|
||
|
database = 'guoyan'
|
||
|
|
||
|
# 打开jdbc.properties文件,获取数据库的配置信息
|
||
|
props = Properties(file_path)
|
||
|
host = 'localhost'
|
||
|
user = props.get('jdbc.username')
|
||
|
password = props.get('jdbc.password')
|
||
|
|
||
|
# 打开数据连接
|
||
|
connect = pymysql.connect(host = host, user = user, password = password, database = database)
|
||
|
|
||
|
# 获取采购信息,并填写到数据库中
|
||
|
crawler = Crawler(connect)
|
||
|
|
||
|
# 启动自动爬取任务
|
||
|
def crawl_check_func():
|
||
|
crawler.Check()
|
||
|
|
||
|
# 启动自动爬取任务
|
||
|
def crawl_job_func():
|
||
|
crawler.Crawl()
|
||
|
|
||
|
sched = BlockingScheduler()
|
||
|
sched.add_job(crawl_job_func, 'interval', hours=3, jitter=120, max_instances=4)
|
||
|
sched.add_job(crawl_check_func, 'interval', days=1, jitter=120, max_instances=4)
|
||
|
sched.start()
|
||
|
|
||
|
# 关闭数据库连接
|
||
|
connect.close()
|