From bbb8627db35bf0ab2dfcb35729f7f894c863f672 Mon Sep 17 00:00:00 2001 From: chen jinqian Date: Mon, 22 Apr 2024 15:18:51 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=AE=81=E6=B3=A2=E4=B8=AD?= =?UTF-8?q?=E4=BB=8B=E8=B6=85=E5=B8=82=E6=8F=90=E5=8F=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dbsearch.py | 2 +- keywordflash.py | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 19 ++++-------- properties.py | 4 --- 4 files changed, 89 insertions(+), 18 deletions(-) create mode 100644 keywordflash.py diff --git a/dbsearch.py b/dbsearch.py index 8669831..f4f94c8 100644 --- a/dbsearch.py +++ b/dbsearch.py @@ -88,7 +88,7 @@ if __name__ == '__main__': """) # 设置运行环境。如果当前是测试环境,则将is_test设置为true - is_test = False + is_test = True if is_test: file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties" diff --git a/keywordflash.py b/keywordflash.py new file mode 100644 index 0000000..850bcae --- /dev/null +++ b/keywordflash.py @@ -0,0 +1,82 @@ +#!/usr/bin/python3 + +import pymysql +from properties import Properties +import sys, getopt + +class DbUpdate: + # 本类用于提供各类数据库信息搜索服务 + def __init__(self, connect): + self.connect = connect + + def GetPresaleKeyword(self, database): + # 查询某个库的数据表的列表 + cursorTable = self.connect.cursor() + cursorTable.execute("SELECT value FROM " + database + ".sysconfigure where fieldname = 'PresaleKeyword';"); + + keywords = cursorTable.fetchall()[0][0].replace(' ', '').replace(',', ',').split(',') + return keywords + + def GetColumnList(self, tableName): + # 查询某张表的数据字段列表 + cursorColumn = self.connect.cursor() + cursorColumn.execute("SELECT column_name,data_type FROM INFORMATION_SCHEMA.COLUMNS where table_schema='" + database + "' AND table_name='" + + tableName + "'"); + return cursorColumn.fetchall() + + def KeywordFlash(self, keywords, database): + # 生成更新SQL语句 + cursor = self.connect.cursor() + strSQL = "update " + database + ".sc_cggg set glbj = 0" + cursor.execute(strSQL) + + strSQL = "update " + database + ".sc_cggg set glbj = 1 where " + + count = len(keywords) + for keyword in keywords: + count = count - 1 + strSQL = strSQL + database + ".findbykeyword(bt,'" + keyword + "')" + if count > 0: + strSQL = strSQL + " OR " + + print(strSQL) + cursor.execute(strSQL) + self.connect.commit() + + return strSQL + +if __name__ == '__main__': + print( +""" +============================================================ +|这是数据库全文检索工具,包含两个参数 | +============================================================ +""") + + # 设置运行环境。如果当前是测试环境,则将is_test设置为true + is_test = False + + if is_test: + file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties" + database = 'guoyantest' + else: + file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties" + database = 'guoyan' + + # 打开jdbc.properties文件,获取数据库的配置信息 + props = Properties(file_path) + host = 'localhost' + user = props.get('jdbc.username') + password = props.get('jdbc.password') + + # 打开数据连接 + db = pymysql.connect(host = host, user = user, password = password, database = database) + + dbUpdate = DbUpdate(db) + keywords = dbUpdate.GetPresaleKeyword(database) + + + print(keywords) + + print(dbUpdate.KeywordFlash(keywords, database)) + diff --git a/main.py b/main.py index 91fc80e..de915d4 100644 --- a/main.py +++ b/main.py @@ -11,7 +11,6 @@ from apscheduler.schedulers.blocking import BlockingScheduler from properties import Properties from crawler import Crawler import sys -import os print( """采购信息采集器 v1.0 @@ -23,17 +22,7 @@ print( ===================================================================================""") # 设置运行环境。如果当前是测试环境,则将is_test设置为true -is_test = True -if is_test: - root = "/opt/eresource_test/webapp/WEB-INF/classes/prod/" -else: - root = "/opt/eresource/webapp/WEB-INF/classes/prod/" - -if os.path.exists(root): - file_path = root + "jdbc.properties" -else: - file_path = "jdbc.properties" - +is_test = False if sys.platform == 'win32': host = '116.62.210.190' user = 'root' @@ -44,13 +33,15 @@ if sys.platform == 'win32': database = 'guoyan' else: if is_test: + file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties" database = 'guoyantest' else: + file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties" database = 'guoyan' # 打开jdbc.properties文件,获取数据库的配置信息 props = Properties(file_path) - host = '116.62.210.190' + host = 'localhost' user = props.get('jdbc.username') password = props.get('jdbc.password') @@ -59,6 +50,8 @@ connect = pymysql.connect(host = host, user = user, password = password, databas # 获取采购信息,并填写到数据库中 crawler = Crawler(connect) +#crawler.filter() +#print(crawler.is_hit('asdfjaslkf工程设计奥森囧(abc工程设计abc公司)')) crawler.Crawl() #crawler.CrawlPage_ygcg_nbcqjy_org(1, {"announcementCode": "21", "announcementType":"采购公告"}) #print(crawler.Check()) diff --git a/properties.py b/properties.py index bd4c29f..4fa152d 100644 --- a/properties.py +++ b/properties.py @@ -9,10 +9,6 @@ import tempfile class Properties: def __init__(self, file_name): - # 如果配置文件不存在,取本地文件 - if not os.path.exists(file_name): - file_name = 'jdbc.properties' - self.file_name = file_name self.properties = {} try: