Browse Source

更新宁波中介超市提取代码

master
chen jinqian 5 months ago
parent
commit
bbb8627db3
  1. 2
      dbsearch.py
  2. 82
      keywordflash.py
  3. 19
      main.py
  4. 4
      properties.py

2
dbsearch.py

@ -88,7 +88,7 @@ if __name__ == '__main__':
""") """)
# 设置运行环境。如果当前是测试环境,则将is_test设置为true # 设置运行环境。如果当前是测试环境,则将is_test设置为true
is_test = False is_test = True
if is_test: if is_test:
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties" file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"

82
keywordflash.py

@ -0,0 +1,82 @@
#!/usr/bin/python3
import pymysql
from properties import Properties
import sys, getopt
class DbUpdate:
# 本类用于提供各类数据库信息搜索服务
def __init__(self, connect):
self.connect = connect
def GetPresaleKeyword(self, database):
# 查询某个库的数据表的列表
cursorTable = self.connect.cursor()
cursorTable.execute("SELECT value FROM " + database + ".sysconfigure where fieldname = 'PresaleKeyword';");
keywords = cursorTable.fetchall()[0][0].replace(' ', '').replace('', ',').split(',')
return keywords
def GetColumnList(self, tableName):
# 查询某张表的数据字段列表
cursorColumn = self.connect.cursor()
cursorColumn.execute("SELECT column_name,data_type FROM INFORMATION_SCHEMA.COLUMNS where table_schema='" + database + "' AND table_name='" +
tableName + "'");
return cursorColumn.fetchall()
def KeywordFlash(self, keywords, database):
# 生成更新SQL语句
cursor = self.connect.cursor()
strSQL = "update " + database + ".sc_cggg set glbj = 0"
cursor.execute(strSQL)
strSQL = "update " + database + ".sc_cggg set glbj = 1 where "
count = len(keywords)
for keyword in keywords:
count = count - 1
strSQL = strSQL + database + ".findbykeyword(bt,'" + keyword + "')"
if count > 0:
strSQL = strSQL + " OR "
print(strSQL)
cursor.execute(strSQL)
self.connect.commit()
return strSQL
if __name__ == '__main__':
print(
"""
============================================================
|这是数据库全文检索工具包含两个参数 |
============================================================
""")
# 设置运行环境。如果当前是测试环境,则将is_test设置为true
is_test = False
if is_test:
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyantest'
else:
file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyan'
# 打开jdbc.properties文件,获取数据库的配置信息
props = Properties(file_path)
host = 'localhost'
user = props.get('jdbc.username')
password = props.get('jdbc.password')
# 打开数据连接
db = pymysql.connect(host = host, user = user, password = password, database = database)
dbUpdate = DbUpdate(db)
keywords = dbUpdate.GetPresaleKeyword(database)
print(keywords)
print(dbUpdate.KeywordFlash(keywords, database))

19
main.py

@ -11,7 +11,6 @@ from apscheduler.schedulers.blocking import BlockingScheduler
from properties import Properties from properties import Properties
from crawler import Crawler from crawler import Crawler
import sys import sys
import os
print( print(
"""采购信息采集器 v1.0 """采购信息采集器 v1.0
@ -23,17 +22,7 @@ print(
===================================================================================""") ===================================================================================""")
# 设置运行环境。如果当前是测试环境,则将is_test设置为true # 设置运行环境。如果当前是测试环境,则将is_test设置为true
is_test = True is_test = False
if is_test:
root = "/opt/eresource_test/webapp/WEB-INF/classes/prod/"
else:
root = "/opt/eresource/webapp/WEB-INF/classes/prod/"
if os.path.exists(root):
file_path = root + "jdbc.properties"
else:
file_path = "jdbc.properties"
if sys.platform == 'win32': if sys.platform == 'win32':
host = '116.62.210.190' host = '116.62.210.190'
user = 'root' user = 'root'
@ -44,13 +33,15 @@ if sys.platform == 'win32':
database = 'guoyan' database = 'guoyan'
else: else:
if is_test: if is_test:
file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyantest' database = 'guoyantest'
else: else:
file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
database = 'guoyan' database = 'guoyan'
# 打开jdbc.properties文件,获取数据库的配置信息 # 打开jdbc.properties文件,获取数据库的配置信息
props = Properties(file_path) props = Properties(file_path)
host = '116.62.210.190' host = 'localhost'
user = props.get('jdbc.username') user = props.get('jdbc.username')
password = props.get('jdbc.password') password = props.get('jdbc.password')
@ -59,6 +50,8 @@ connect = pymysql.connect(host = host, user = user, password = password, databas
# 获取采购信息,并填写到数据库中 # 获取采购信息,并填写到数据库中
crawler = Crawler(connect) crawler = Crawler(connect)
#crawler.filter()
#print(crawler.is_hit('asdfjaslkf工程设计奥森囧(abc工程设计abc公司)'))
crawler.Crawl() crawler.Crawl()
#crawler.CrawlPage_ygcg_nbcqjy_org(1, {"announcementCode": "21", "announcementType":"采购公告"}) #crawler.CrawlPage_ygcg_nbcqjy_org(1, {"announcementCode": "21", "announcementType":"采购公告"})
#print(crawler.Check()) #print(crawler.Check())

4
properties.py

@ -9,10 +9,6 @@ import tempfile
class Properties: class Properties:
def __init__(self, file_name): def __init__(self, file_name):
# 如果配置文件不存在,取本地文件
if not os.path.exists(file_name):
file_name = 'jdbc.properties'
self.file_name = file_name self.file_name = file_name
self.properties = {} self.properties = {}
try: try:

Loading…
Cancel
Save