Browse Source

更新宁波中介超市提取代码

master
chen jinqian 5 months ago
parent
commit
bbb8627db3
  1. 2
      dbsearch.py
  2. 82
      keywordflash.py
  3. 19
      main.py
  4. 4
      properties.py

2
dbsearch.py

@ -88,7 +88,7 @@ if __name__ == '__main__':
""")
# 设置运行环境。如果当前是测试环境,则将is_test设置为true
is_test = False
is_test = True
if is_test:
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"

82
keywordflash.py

@ -0,0 +1,82 @@
#!/usr/bin/python3
import pymysql
from properties import Properties
import sys, getopt
class DbUpdate:
# 本类用于提供各类数据库信息搜索服务
def __init__(self, connect):
self.connect = connect
def GetPresaleKeyword(self, database):
# 查询某个库的数据表的列表
cursorTable = self.connect.cursor()
cursorTable.execute("SELECT value FROM " + database + ".sysconfigure where fieldname = 'PresaleKeyword';");
keywords = cursorTable.fetchall()[0][0].replace(' ', '').replace('', ',').split(',')
return keywords
def GetColumnList(self, tableName):
# 查询某张表的数据字段列表
cursorColumn = self.connect.cursor()
cursorColumn.execute("SELECT column_name,data_type FROM INFORMATION_SCHEMA.COLUMNS where table_schema='" + database + "' AND table_name='" +
tableName + "'");
return cursorColumn.fetchall()
def KeywordFlash(self, keywords, database):
# 生成更新SQL语句
cursor = self.connect.cursor()
strSQL = "update " + database + ".sc_cggg set glbj = 0"
cursor.execute(strSQL)
strSQL = "update " + database + ".sc_cggg set glbj = 1 where "
count = len(keywords)
for keyword in keywords:
count = count - 1
strSQL = strSQL + database + ".findbykeyword(bt,'" + keyword + "')"
if count > 0:
strSQL = strSQL + " OR "
print(strSQL)
cursor.execute(strSQL)
self.connect.commit()
return strSQL
if __name__ == '__main__':
print(
"""
============================================================
|这是数据库全文检索工具包含两个参数 |
============================================================
""")
# 设置运行环境。如果当前是测试环境,则将is_test设置为true
is_test = False
if is_test:
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyantest'
else:
file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyan'
# 打开jdbc.properties文件,获取数据库的配置信息
props = Properties(file_path)
host = 'localhost'
user = props.get('jdbc.username')
password = props.get('jdbc.password')
# 打开数据连接
db = pymysql.connect(host = host, user = user, password = password, database = database)
dbUpdate = DbUpdate(db)
keywords = dbUpdate.GetPresaleKeyword(database)
print(keywords)
print(dbUpdate.KeywordFlash(keywords, database))

19
main.py

@ -11,7 +11,6 @@ from apscheduler.schedulers.blocking import BlockingScheduler
from properties import Properties
from crawler import Crawler
import sys
import os
print(
"""采购信息采集器 v1.0
@ -23,17 +22,7 @@ print(
===================================================================================""")
# 设置运行环境。如果当前是测试环境,则将is_test设置为true
is_test = True
if is_test:
root = "/opt/eresource_test/webapp/WEB-INF/classes/prod/"
else:
root = "/opt/eresource/webapp/WEB-INF/classes/prod/"
if os.path.exists(root):
file_path = root + "jdbc.properties"
else:
file_path = "jdbc.properties"
is_test = False
if sys.platform == 'win32':
host = '116.62.210.190'
user = 'root'
@ -44,13 +33,15 @@ if sys.platform == 'win32':
database = 'guoyan'
else:
if is_test:
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyantest'
else:
file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyan'
# 打开jdbc.properties文件,获取数据库的配置信息
props = Properties(file_path)
host = '116.62.210.190'
host = 'localhost'
user = props.get('jdbc.username')
password = props.get('jdbc.password')
@ -59,6 +50,8 @@ connect = pymysql.connect(host = host, user = user, password = password, databas
# 获取采购信息,并填写到数据库中
crawler = Crawler(connect)
#crawler.filter()
#print(crawler.is_hit('asdfjaslkf工程设计奥森囧(abc工程设计abc公司)'))
crawler.Crawl()
#crawler.CrawlPage_ygcg_nbcqjy_org(1, {"announcementCode": "21", "announcementType":"采购公告"})
#print(crawler.Check())

4
properties.py

@ -9,10 +9,6 @@ import tempfile
class Properties:
def __init__(self, file_name):
# 如果配置文件不存在,取本地文件
if not os.path.exists(file_name):
file_name = 'jdbc.properties'
self.file_name = file_name
self.properties = {}
try:

Loading…
Cancel
Save