chen jinqian
10 months ago
1 changed files with 71 additions and 0 deletions
@ -0,0 +1,71 @@ |
|||
#!/usr/bin/python3 |
|||
"""这是爬虫的主程序主程序 |
|||
作者:陈进钱 |
|||
日期:2023/11/03 |
|||
""" |
|||
|
|||
import pymysql |
|||
import datetime |
|||
import time |
|||
from apscheduler.schedulers.blocking import BlockingScheduler |
|||
from properties import Properties |
|||
from crawler import Crawler |
|||
import sys |
|||
|
|||
print( |
|||
"""采购信息采集器 v1.0 |
|||
=================================================================================== |
|||
这个程序用于获取各大招投标网站的采购信息 |
|||
version: 1.0 |
|||
作者:陈进钱 |
|||
日期:2023-11-04 |
|||
===================================================================================""") |
|||
|
|||
# 设置运行环境。如果当前是测试环境,则将is_test设置为true |
|||
is_test = False |
|||
print(sys.platform) |
|||
if sys.platform == 'win32': |
|||
host = '116.62.210.190' |
|||
user = 'root' |
|||
password = 'Guoyan83086775' |
|||
if is_test: |
|||
database = 'guoyantest' |
|||
file_path = "./jdbc.test.properties" |
|||
else: |
|||
database = 'guoyan' |
|||
file_path = "./jdbc.properties" |
|||
else: |
|||
if is_test: |
|||
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties" |
|||
database = 'guoyantest' |
|||
else: |
|||
file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties" |
|||
database = 'guoyan' |
|||
|
|||
# 打开jdbc.properties文件,获取数据库的配置信息 |
|||
props = Properties(file_path) |
|||
host = 'localhost' |
|||
user = props.get('jdbc.username') |
|||
password = props.get('jdbc.password') |
|||
|
|||
# 打开数据连接 |
|||
connect = pymysql.connect(host = host, user = user, password = password, database = database) |
|||
|
|||
# 获取采购信息,并填写到数据库中 |
|||
crawler = Crawler(connect) |
|||
#crawler.filter() |
|||
#print(crawler.is_hit('asdfjaslkf工程设计奥森囧(abc工程设计abc公司)')) |
|||
crawler.Crawl() |
|||
#crawler.CrawlPage_ygcg_nbcqjy_org(1, {"announcementCode": "21", "announcementType":"采购公告"}) |
|||
#print(crawler.Check()) |
|||
|
|||
# 启动自动爬取任务 |
|||
#def crawl_job_func(): |
|||
# crawler.Crawl() |
|||
|
|||
#sched = BlockingScheduler() |
|||
#sched.add_job(crawl_job_func, 'interval', hours=1, jitter=120) |
|||
#sched.start() |
|||
|
|||
# 关闭数据库连接 |
|||
connect.close() |
Loading…
Reference in new issue