Browse Source

更新宁波中介超市提取代码

master
chen jinqian 5 months ago
parent
commit
18f7bfc998
  1. 71
      localmain.py

71
localmain.py

@ -0,0 +1,71 @@
#!/usr/bin/python3
"""这是爬虫的主程序主程序
作者陈进钱
日期2023/11/03
"""
import pymysql
import datetime
import time
from apscheduler.schedulers.blocking import BlockingScheduler
from properties import Properties
from crawler import Crawler
import sys
print(
"""采购信息采集器 v1.0
===================================================================================
这个程序用于获取各大招投标网站的采购信息
version: 1.0
作者陈进钱
日期2023-11-04
===================================================================================""")
# 设置运行环境。如果当前是测试环境,则将is_test设置为true
is_test = False
print(sys.platform)
if sys.platform == 'win32':
host = '116.62.210.190'
user = 'root'
password = 'Guoyan83086775'
if is_test:
database = 'guoyantest'
file_path = "./jdbc.test.properties"
else:
database = 'guoyan'
file_path = "./jdbc.properties"
else:
if is_test:
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyantest'
else:
file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyan'
# 打开jdbc.properties文件,获取数据库的配置信息
props = Properties(file_path)
host = 'localhost'
user = props.get('jdbc.username')
password = props.get('jdbc.password')
# 打开数据连接
connect = pymysql.connect(host = host, user = user, password = password, database = database)
# 获取采购信息,并填写到数据库中
crawler = Crawler(connect)
#crawler.filter()
#print(crawler.is_hit('asdfjaslkf工程设计奥森囧(abc工程设计abc公司)'))
crawler.Crawl()
#crawler.CrawlPage_ygcg_nbcqjy_org(1, {"announcementCode": "21", "announcementType":"采购公告"})
#print(crawler.Check())
# 启动自动爬取任务
#def crawl_job_func():
# crawler.Crawl()
#sched = BlockingScheduler()
#sched.add_job(crawl_job_func, 'interval', hours=1, jitter=120)
#sched.start()
# 关闭数据库连接
connect.close()
Loading…
Cancel
Save