You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
59 lines
1.7 KiB
59 lines
1.7 KiB
#!/usr/bin/python3
|
|
"""这是爬虫的主程序主程序
|
|
作者:陈进钱
|
|
日期:2023/11/03
|
|
"""
|
|
|
|
import pymysql
|
|
import datetime
|
|
import time
|
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
|
from properties import Properties
|
|
from crawler import Crawler
|
|
|
|
print(
|
|
"""采购信息采集器 v1.0
|
|
===================================================================================
|
|
这个程序用于获取各大招投标网站的采购信息
|
|
version: 1.0
|
|
作者:陈进钱
|
|
日期:2023-11-04
|
|
===================================================================================""")
|
|
|
|
# 设置运行环境。如果当前是测试环境,则将is_test设置为true
|
|
is_test = False
|
|
|
|
if is_test:
|
|
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
|
|
database = 'guoyantest'
|
|
else:
|
|
file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
|
|
database = 'guoyan'
|
|
|
|
# 打开jdbc.properties文件,获取数据库的配置信息
|
|
props = Properties(file_path)
|
|
host = 'localhost'
|
|
user = props.get('jdbc.username')
|
|
password = props.get('jdbc.password')
|
|
|
|
# 打开数据连接
|
|
connect = pymysql.connect(host = host, user = user, password = password, database = database)
|
|
|
|
# 获取采购信息,并填写到数据库中
|
|
crawler = Crawler(connect)
|
|
|
|
# 启动自动爬取任务
|
|
def crawl_check_func():
|
|
crawler.Check()
|
|
|
|
# 启动自动爬取任务
|
|
def crawl_job_func():
|
|
crawler.Crawl()
|
|
|
|
sched = BlockingScheduler()
|
|
sched.add_job(crawl_job_func, 'interval', hours=3, jitter=120, max_instances=4)
|
|
sched.add_job(crawl_check_func, 'interval', days=1, jitter=120, max_instances=4)
|
|
sched.start()
|
|
|
|
# 关闭数据库连接
|
|
connect.close()
|
|
|