#!/usr/bin/python3 """这是爬虫的主程序主程序 作者:陈进钱 日期:2023/11/03 """ import pymysql import datetime import time from apscheduler.schedulers.blocking import BlockingScheduler from properties import Properties from crawler import Crawler import sys print( """采购信息采集器 v1.0 =================================================================================== 这个程序用于获取各大招投标网站的采购信息 version: 1.0 作者:陈进钱 日期:2023-11-04 ===================================================================================""") # 设置运行环境。如果当前是测试环境,则将is_test设置为true is_test = False print(sys.platform) if sys.platform == 'win32': host = '116.62.210.190' user = 'root' password = 'Guoyan83086775' if is_test: database = 'guoyantest' file_path = "./jdbc.test.properties" else: database = 'guoyan' file_path = "./jdbc.properties" else: if is_test: file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties" database = 'guoyantest' else: file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties" database = 'guoyan' # 打开jdbc.properties文件,获取数据库的配置信息 props = Properties(file_path) host = 'localhost' user = props.get('jdbc.username') password = props.get('jdbc.password') # 打开数据连接 connect = pymysql.connect(host = host, user = user, password = password, database = database) # 获取采购信息,并填写到数据库中 crawler = Crawler(connect) #crawler.filter() #print(crawler.is_hit('asdfjaslkf工程设计奥森囧(abc工程设计abc公司)')) crawler.Crawl() #crawler.CrawlPage_ygcg_nbcqjy_org(1, {"announcementCode": "21", "announcementType":"采购公告"}) #print(crawler.Check()) # 启动自动爬取任务 #def crawl_job_func(): # crawler.Crawl() #sched = BlockingScheduler() #sched.add_job(crawl_job_func, 'interval', hours=1, jitter=120) #sched.start() # 关闭数据库连接 connect.close()