first commit

1 year ago · 1fb57e6877
35 changed files with 4828 additions and 0 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,8 @@
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.9 (PyGuoyan)" />
+  </component>
+  <component name="ProjectRootManager">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/PyGuoyan.iml" filepath="$PROJECT_DIR$/PyGuoyan.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/redis-manager-config.xml
+++ b/.idea/redis-manager-config.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PersistentConfig">
+    <option name="langCode" value="en" />
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/PyGuoyan.iml
+++ b/PyGuoyan.iml
@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.9 (PyGuoyan)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/SYgcg.py
+++ b/SYgcg.py
@ -0,0 +1,14 @@
+#!/usr/bin/python3
+from splash.gysplash import SBase
+import json
+
+class SYgcg(SBase):
+    def open(self):
+        return super().open('ygcg', pages=2, annoucement_type='政府采购')
+
+if __name__ == '__main__':
+    test = SYgcg()
+    r = test.open()
+
+    results = json.loads(r.text)
+    print(results)
--- a/pycache/crawler.cpython-310.pyc
+++ b/pycache/crawler.cpython-310.pyc
--- a/pycache/gymailer.cpython-310.pyc
+++ b/pycache/gymailer.cpython-310.pyc
--- a/pycache/properties.cpython-310.pyc
+++ b/pycache/properties.cpython-310.pyc
--- a/cert.crt
+++ b/cert.crt
@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdTCCAl2gAwIBAgILBAAAAAABFUtaw5QwDQYJKoZIhvcNAQEFBQAwVzELMAkG
+A1UEBhMCQkUxGTAXBgNVBAoTEEdsb2JhbFNpZ24gbnYtc2ExEDAOBgNVBAsTB1Jv
+b3QgQ0ExGzAZBgNVBAMTEkdsb2JhbFNpZ24gUm9vdCBDQTAeFw05ODA5MDExMjAw
+MDBaFw0yODAxMjgxMjAwMDBaMFcxCzAJBgNVBAYTAkJFMRkwFwYDVQQKExBHbG9i
+YWxTaWduIG52LXNhMRAwDgYDVQQLEwdSb290IENBMRswGQYDVQQDExJHbG9iYWxT
+aWduIFJvb3QgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDaDuaZ
+jc6j40+Kfvvxi4Mla+pIH/EqsLmVEQS98GPR4mdmzxzdzxtIK+6NiY6arymAZavp
+xy0Sy6scTHAHoT0KMM0VjU/43dSMUBUc71DuxC73/OlS8pF94G3VNTCOXkNz8kHp
+1Wrjsok6Vjk4bwY8iGlbKk3Fp1S4bInMm/k8yuX9ifUSPJJ4ltbcdG6TRGHRjcdG
+snUOhugZitVtbNV4FpWi6cgKOOvyJBNPc1STE4U6G7weNLWLBYy5d4ux2x8gkasJ
+U26Qzns3dLlwR5EiUWMWea6xrkEmCMgZK9FGqkjWZCrXgzT/LCrBbBlDSgeF59N8
+9iFo7+ryUp9/k5DPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8E
+BTADAQH/MB0GA1UdDgQWBBRge2YaRQ2XyolQL30EzTSo//z9SzANBgkqhkiG9w0B
+AQUFAAOCAQEA1nPnfE920I2/7LqivjTFKDK1fPxsnCwrvQmeU79rXqoRSLblCKOz
+yj1hTdNGCbM+w6DjY1Ub8rrvrTnhQ7k4o+YviiY776BQVvnGCv04zcQLcFGUl5gE
+38NflNUVyRRBnMRddWQVDf9VMOyGj/8N7yy5Y0b2qvzfvGn9LhJIZJrglfCm7ymP
+AbEVtQwdpf5pLGkkeB6zpxxxYu7KyJesF12KwvhHhm4qxFYxldBniYUr+WymXUad
+DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME
+HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A==
+-----END CERTIFICATE-----
--- a/config.ini
+++ b/config.ini
@ -0,0 +1,5 @@
+[database]
+host = localhost
+database = guoyantest
+user = root
+password = Guoyan83086775
--- a/crawler.py
+++ b/crawler.py
@ -0,0 +1,714 @@
+#!/usr/bin/python3
+"""
+===========================================================================================
+这是一个用于爬取采购信息的模块
+要处理采购公告信息。主要涉及sc_cggg, calalog, catalogdata, readlog四张表
+===========================================================================================
+class Crawler:
+    def __init__(self, connect):
+    def generate_id(self):
+    def write_log_information(self, data_id, catalog_name):
+    def CrawlPage_gzw_ningbo(self, page):           # 宁波国资委市属国企招标投标信息
+    def CrawlPage_zjcs_nbxzfw(self, type, page):    # 宁波市中介超市
+    def CrawlPage_ygcg_nbcqjy_org(self, page):      # 宁波市阳光采购
+    def CrawlPage_zfcg_czt_zj(self, page):          # 浙江政府采购网
+    def CrawlPage_cbbidding(self, page):            # 宁波中基国际招标有限公司
+    def CrawlPage_zmeetb(self, page):               # 浙江国际招标有限公司
+    def CrawlPage_nbbidding(self, page):            # 宁波国际招标有限公司
+============================================================================================
+"""
+
+import datetime
+import hashlib
+import pymysql
+import json
+import random
+from requests_html import HTMLSession
+from requests_html import HTML, UserAgent
+import gymailer
+import time
+
+'''
+============================================================
+ 这个类用来封装splash服务
+ 其中:
+   self.splash_ip 参数是splash服务的ip
+============================================================
+'''
+
+class Splash:
+    def __init__(self):
+        self.splash_ip = '127.0.0.1'
+
+    '''
+    ============================================================
+       wait_for参数用来制定需要等待的元素，只有该元素渲染完成，程序才能染回，否则将等待200秒。wait_for 参数采购选择器的方式，如
+       如制定元素id, 采用“#app"形式，如制定元素class, 采用 '.class-name'形式。
+    ============================================================
+    '''
+    def post(self, url, wait_for, pages=1, page_element='', headers={'content-type':'application/json','Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'}):
+        lua_scripts = """
+function wait_for_element(splash, css, maxwait)
+  -- Wait until a selector matches an element
+  -- in the page. Return an error if waited more
+  -- than maxwait seconds.
+  if maxwait == nil then
+      maxwait = 10
+  end
+  return splash:wait_for_resume(string.format([[
+    function main(splash) {
+      var selector = '%s';
+      var maxwait = %s;
+      var end = Date.now() + maxwait*1000;
+
+      function check() {
+        if(document.querySelector(selector)) {
+          splash.resume('Element found');
+        } else if(Date.now() >= end) {
+          var err = 'Timeout waiting for element';
+          splash.error(err + " " + selector);
+        } else {
+          setTimeout(check, 200);
+        }
+      }
+      check();
+    }
+  ]], css, maxwait))
+end
+
+function main(splash, args)
+  pages = """ + str(pages) + """
+  page_element = '""" + page_element + """'
+  wait_for = '""" + wait_for + """'
+  splash:go('""" + url + """')
+  wait_for_element(splash, wait_for)
+  wait_for_element(splash, page_element)
+  
+  -- 将第一页的结果加入返回结果集中
+  results = {splash.html()}
+
+  if pages == 1 then
+    return results
+  else
+    -- 执行翻页动作
+    -- 先页面上的翻页元件（element），然后发送点击事件（click()）翻页
+    for i = 2, pages do
+      -- js 中是javascript脚本，用于获取翻页的元件，并发送click事件
+      js = string.format("document.querySelector('%s').click();", page_element)
+
+      -- 执行翻页脚本
+      splash:runjs(js)
+
+      -- 等待页面加载完成
+      wait_for_element(splash, wait_for)
+      wait_for_element(splash, page_element)
+
+      -- 这个地方看来必须加上延时，否则页面加载不完全，可能还没有完成页面更新
+      assert(splash:wait(5))
+
+      -- 将页面加入返回结果集中
+      table.insert(results, splash.html())
+    end
+    return results
+  end
+end
+        """
+
+        splash_url = 'http://' + self.splash_ip + ':8050/execute'
+        data = json.dumps({'lua_source':lua_scripts})
+        r = HTMLSession().post(splash_url, headers=headers, data=data)
+        return r
+
+
+class Crawler:
+    def __init__(self, connect):
+        self.connect = connect
+
+    def generate_id(self):
+        # 用于生成一个32位的ID号
+        current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + str(random.randint(0, 1000000))
+        md5_hash = hashlib.md5()
+        md5_hash.update(current_time.encode('utf-8'))
+        return md5_hash.hexdigest()
+
+    def write_log_information(self, data_id, catalog_name, log_type='采购公告'):
+        # 添加了一条信息，需要同步更新其他相关信息, 包含对话框信息和日志信息两项
+        with self.connect.cursor() as cursor:
+            affected_row = cursor.execute("select id from catalog where name = '%s'" % (log_type))
+            if affected_row == 0:
+                return False
+
+            result = cursor.fetchall()
+            catalog_id = result[0][0]
+            catalogdata_id = self.generate_id()
+            readlog_id = self.generate_id()
+
+            affected_row = cursor.execute("SELECT staffid FROM userinfo where username = 'root'")
+            if affected_row == 0:
+                return False
+
+            result = cursor.fetchall()
+            staff_id = result[0][0]
+            add_date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+            
+            affected_row = cursor.execute(
+                    'insert into catalogdata (id, dataid, catalogid, creatorid, menderid, adddate, modifydate, datastatus) values (%s, %s, %s, %s, %s, %s, %s, %s)',
+                    (catalogdata_id, data_id, catalog_id, staff_id, staff_id, add_date, add_date, 0))
+
+            cursor.execute(
+                    'insert into readlog (id, dataid, staffid, readnum, adddate, LastAccessDate, resid) values (%s, %s, %s, %s, %s, %s, %s)',
+                    (readlog_id, data_id, staff_id, 1, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), catalog_id))
+
+        return True
+        
+
+    def write_information(self, title, url, region, publishTime, announcementType):
+        # 用于将一条信息写入数据库中
+        with self.connect.cursor() as cursor:
+            cggg_id = self.generate_id()
+
+
+            try:
+                title = title.replace("'", "\\\'")
+                affected_rows = cursor.execute(
+                            'insert into sc_cggg (id, bt, lj, ssqy, fbsj, gglb) values (%s, %s, %s, %s, %s, %s)',
+                            (cggg_id, title, url, region, publishTime, announcementType))
+            except pymysql.err.IntegrityError:
+                print('信息重复')
+                self.connect.rollback()
+                return False
+            else:
+                if self.write_log_information(cggg_id, announcementType):
+                    self.connect.commit()
+                else:
+                    print('添加采购信息失败')
+                    self.connect.rollback()
+                    return False
+
+        return True
+
+    def write_information_cgyx(self, cgyx):
+        # 用于将一条信息写入数据库中
+
+        with self.connect.cursor() as cursor:
+            cgyx_id = self.generate_id()
+            cgyx['cgxmmc'] = cgyx['cgxmmc'].replace("'", "\\\'")
+            strSql = 'insert into sc_cgyx (id, cgxmmc, lj, cgxqqk, ysje, yjcgsj, ly) values (\''+cgyx_id+'\',\''+cgyx['cgxmmc']+'\',\''+cgyx['lj']+'\',\''+cgyx['cgxqqk']+'\',\''+cgyx['ysje']+'\',\''+cgyx['yjcgsj']+'\',\''+cgyx['ly']+'\')'
+            try:
+                affected_rows = cursor.execute(strSql)
+            except pymysql.err.IntegrityError:
+                print('信息重复')
+                #self.connect.rollback()
+                return False
+            else:
+                if self.write_log_information(cgyx_id, '采购意向'):
+                    self.connect.commit()
+                else:
+                    print('添加采购信息失败')
+                    self.connect.rollback()
+                    return False
+
+        return True
+
+    def Check(self):
+        with self.connect.cursor() as cursor:
+            affected_row = cursor.execute("select id as total from sc_cggg where date(fbsj) > (NOW() - INTERVAL 1 DAY);")
+            if affected_row == 0:
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息', '采购信息提取不正常，请检查！')
+                return False
+            else:
+                return True
+
+
+    def Crawl(self):
+        # 这个方法是实际完成爬取工作的总入口。
+
+        # 爬取浙江政采网的信息
+        print('开始获取浙江政采网的信息\n')
+        
+        # 定义要传递进去的关于公告信息类型的数据结构
+        infoType = [
+            {"announcementCode": "110-175885", "announcementType":"采购意向"},
+            {"announcementCode": "110-978863", "announcementType":"采购公告"},
+            {"announcementCode": "110-943756", "announcementType":"更正公告"},
+            {"announcementCode": "110-420383", "announcementType":"非政府采购公告"},
+            {"announcementCode": "110-900461", "announcementType":"结果公告"}
+        ]
+        for typeParam in infoType:
+            for page in range(1, 11):
+                try:
+                    self.CrawlPage_zfcg_czt_zj(page, typeParam)
+                except Exception as e:
+                    print('3--------------------------------', e)
+
+        # 爬取宁波市阳光采购网的信息 
+        print('开始获取宁波市阳光采购网的信息\n')
+        infoType = [
+            {"announcementCode": "21", "announcementType":"采购公告"},
+            {"announcementCode": "23", "announcementType":"更正公告"},
+            {"announcementCode": "22", "announcementType":"结果公告"}
+        ]
+        for typeParam in infoType:
+            try:
+                self.CrawlPage_ygcg_nbcqjy_org(2, typeParam)
+            except Exception as e:
+                print('4--------------------------------', e)
+
+        # 爬取宁波市中介超市网的信息 
+        print('开始获取宁波市中介超市网的信息\n')
+        infoType = [
+            {"announcementCode": '1', "announcementType":"项目需求公告"},
+            {"announcementCode": '2', "announcementType":"结果公告"}
+        ]
+         
+        for typeParam in infoType:
+            for page in range(1, 6):
+                try:
+                    self.CrawlPage_zjcs_nbxzfw(page, typeParam)
+                except Exception as e:
+                    print('5------------------------------', e)
+
+        # 爬取宁波市国资委市属企业采购信息 
+        print('开始获取宁波市国资委市属企业招投标网的信息\n')
+        for page in range(1, 5):
+            try:
+                self.CrawlPage_gzw_ningbo(page)
+            except Exception as e:
+                print('6------------------------------', e)
+        
+        # 爬取宁波中基国际招标网的信息 
+        print('开始获取宁波中基国际招标网的信息\n')
+        infoType = [
+            {"announcementCode": "22", "announcementType":"采购公告"},
+            {"announcementCode": "23", "announcementType":"结果公告"}
+        ]
+         
+        for typeParam in infoType:
+            for page in range(1, 6):
+                try:
+                    self.CrawlPage_cbbidding(page, typeParam)
+                except Exception as e:
+                    print('7--------------------------------', e)
+        
+        # 爬取浙江国际招标网的信息 
+        print('开始获取浙江国际招标网的信息\n')
+        infoType = [
+            {"announcementCode": "Zbgg", "announcementType":"采购公告"},
+            {"announcementCode": "Gzgg", "announcementType":"更正公告"},
+            {"announcementCode": "jggg", "announcementType":"结果公告"}
+        ]
+         
+        for typeParam in infoType:
+            for page in range(1, 5):
+                try:
+                    self.CrawlPage_zmeetb(page, typeParam)
+                except Exception as e:
+                    print('8----------------------------', e)
+        
+        
+        # 爬取宁波市国际招标有限公司网站
+        print('开始获取宁波国际招标网的信息\n')
+
+        # 定义要传递进去的关于公告信息类型的数据结构
+        infoType = [
+            {"announcementCode": "1", "announcementType":"采购公告"},
+            {"announcementCode": "1", "announcementType":"结果公告"},
+            {"announcementCode": "2", "announcementType":"采购公告"},
+            {"announcementCode": "2", "announcementType":"结果公告"}
+        ]
+        for typeParam in infoType:
+            for page in range(1, 5):
+                try:
+                    self.CrawlPage_nbbidding(page, typeParam)
+                except Exception as e:
+                    print('9--------------------------------', e)
+
+        # 爬取宁波名诚招标代理有限公司网站
+        print('开始获取宁波名城招标的信息\n')
+
+        # 定义要传递进去的关于公告信息类型的数据结构
+        infoType = [
+            {"announcementCode": "99", "announcementType":"采购公告"},
+            {"announcementCode": "88", "announcementType":"结果公告"}
+        ]
+        for typeParam in infoType:
+            for page in range(1, 2):
+                try:
+                    self.CrawlPage_nbmcbidding(page, typeParam)
+                except Exception as e:
+                    print('10--------------------------------', e)
+
+
+    # 宁波中基国际招标有限公司    https://www.cbbidding.com/
+    def CrawlPage_cbbidding(self, page, typeParam):
+        # 这个方法是实际爬取指定页面的信息。
+        session = HTMLSession()
+        session.DEFAULT_RETRIES = 5
+        url = 'https://www.cbbidding.com/Index/cms.html?mid=' +typeParam['announcementCode'] + '&%2FIndex%2Fcms%2Fmid%2F' + typeParam['announcementCode'] + '_html=&page=' + str(page)
+
+        headers =  {
+            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "Accept-Encoding": "gzip, deflate",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Connection": "keep-alive",
+            "DNT": '1',
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36 HBPC/12.1.3.306"
+        }
+
+
+        # 这个网站返回的是一个网页，所以需要进行网页解析
+        r = session.get(url = url, headers = headers)
+
+        if r.status_code != 200:
+            if page == 1:
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:宁波中基国际招标网', r.text)
+            return False
+
+        # 注意：xpath 函数返回的是list对象, 对象的元素是element
+        data = r.html.xpath('/html/body/div[3]/div[3]/div[2]/div[2]/div/ul/li')
+        for item in data:
+            title = item.xpath('//a')[0].text
+            url = 'https://www.cbbidding.com' + item.xpath('//a')[0].attrs.get('href')
+            region = '中基招标'
+            publishDate = item.xpath('//div')[0].text
+            
+            try:
+                publishDate = str(datetime.datetime.strptime(publishDate, '%Y-%m-%d'))
+            except Exception as e:
+                publishDate = publishDate.replace('.', '-')
+                publishDate = str(datetime.datetime.strptime(publishDate, '%Y-%m-%d'))
+
+            print(url, title)
+            announcementType = typeParam['announcementType']
+            #print(title, url, region, publishDate, announcementType)
+            self.write_information(title, url, region, publishDate, announcementType)
+
+
+    # 浙江国际招投标有限公司 https://www.zmeetb.com/
+    def CrawlPage_zmeetb(self, page, typeParam):
+        # 这个方法是实际爬取指定页面的信息。
+        session = HTMLSession()
+        url = 'https://www.zmeetb.com/' +typeParam['announcementCode'] + '/index/p/'  + str(page) + '.html'
+
+        headers =  {
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+            "Accept-Encoding": "gzip, deflate, br",
+            "Cache-Control": "max-age=0",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Connection": "close",
+            "DNT": '1',
+            "Host": "www.zmeetb.com",
+            "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="99"',
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": "Windows",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "none",
+            "Sec-Fetch-User": "?1",
+            "Upgrade-Insecure-Requests": "1",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36 HBPC/12.1.3.306"
+        }
+
+        # 这个网站返回的是一个网页，所以需要进行网页解析
+        # 这个网站如果使用render()函数，会遇到ssl证书问题，需要进一步研究chromium浏览器的证书问题
+        #r = session.get(url = url, headers = headers, verify='/opt/PyGuoyan/www.zmeetb.com')
+        r = session.get(url = url, headers = headers, verify=False)
+
+        if r.status_code != 200:
+            if page == 1:
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:浙江国际招标网', r.text)
+            return False
+
+        # 注意：xpath 函数返回的是list对象, 对象的元素是element
+        data = r.html.xpath('/html/body/div[1]/div[3]/div[2]/div/div/div[3]/div/ul/li/a')
+        for item in data:
+            title = item.xpath('//p')[0].text
+            url = item.attrs.get('href')
+            region = '浙江国际招标'
+            publishDate = item.xpath('//p')[1].text
+            announcementType = typeParam['announcementType']
+
+            self.write_information(title, url, region, publishDate, announcementType)
+
+        
+    # 宁波市名诚招标有限有限公司 http://www.nbmcbidding.com/
+    def CrawlPage_nbmcbidding(self, page, typeParam):
+        # 这个方法是实际爬取指定页面的信息。
+        session = HTMLSession()
+        if typeParam['announcementType'] == '采购公告':
+            url = "http://www.nbmcbidding.com/news/99/"+str(page)+"/"
+        else:
+            url = "http://www.nbmcbidding.com/news/88/"+str(page)+"/"
+
+
+        data = {}
+        headers =  {
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+            "Host": "www.nbmcbidding.com",
+            'Connection': 'keep-alive',
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36 HBPC/12.1.3.306"
+        }
+
+        r = session.get(url = url, headers = headers, json = data)
+
+        if r.status_code != 200:
+            if page == 1:
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:宁波名诚招标代理有限公司', r.text)
+            return False
+
+        # 注意：xpath 函数返回的是list对象, 对象的元素是element
+        data = r.html.xpath('/html/body/div[1]/div/div[3]/div[2]/ul/li')
+        for item in data:
+            title = item.xpath('//a/div[2]')[0].text
+            url = item.xpath('//a')[0].attrs.get('href')
+            region = '宁波名诚招标'
+            publishDate = item.xpath('//a/div[4]')[0].text
+            announcementType = typeParam['announcementType']
+
+            self.write_information(title, url, region, publishDate, announcementType)
+
+
+    # 宁波市国际招标有限公司 http://www.nbbidding.com/
+    def CrawlPage_nbbidding(self, page, typeParam):
+        # 这个方法是实际爬取指定页面的信息。
+        session = HTMLSession()
+        if typeParam['announcementType'] == '采购公告':
+            url = "http://www.nbbidding.com/Home/Notice/news_list?page="+str(page)+"&is_Open=1&keyword"
+
+        else:
+            url = "http://www.nbbidding.com/Home/Publicity/news_list?page="+str(page)+"&is_Open=1&keyword"
+
+
+        data = {}
+        headers =  {
+            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "Host": "www.nbbidding.com",
+            'Connection': 'keep-alive',
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36 HBPC/12.1.3.306"
+        }
+
+        r = session.get(url = url, headers = headers, json = data)
+
+        if r.status_code != 200:
+            if page == 1:
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:宁波市国际招标网', r.text)
+            return False
+
+        data = json.loads(r.text)['data']
+        total = data['page']['count']
+        data = data['list']
+
+        for item in data:
+            id = item['id']
+            if typeParam['announcementType'] == '采购公告':
+                url = 'http://www.nbbidding.com/Home/Notice/news_detail?id=%s' % (id)
+            else:
+                url = 'http://www.nbbidding.com/Home/Publicity/news_detail?id=%s' % (id) 
+            title = item['title']
+            region = '宁波国际招标'
+            publishDate = item['addtime']
+            announcementType = item['stage']
+            self.write_information(title, url, region, publishDate, announcementType)
+
+            print(publishDate, title, url)
+
+    # 宁波市国资委属企业招标信息网
+    def CrawlPage_gzw_ningbo(self, page):
+        # 这个方法是实际爬取指定页面的信息。
+        session = HTMLSession()
+        url = 'http://gzw.ningbo.gov.cn/col/col1229663137/index.html?uid=6085425&pageNum=%s' % str(page)
+
+        headers =  {
+            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "Accept-Encoding": "gzip, deflate",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Connection": "keep-alive",
+            "DNT": '1',
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36 HBPC/12.1.3.306"
+        }
+
+        # 这个网站返回的是一个网页，所以需要进行网页解析
+        r = session.get(url = url, headers = headers)
+        r.html.render()
+
+        if r.status_code != 200:
+            if page == 1:
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:宁波市国资委市属企业招标信息网', r.text)
+            return False
+
+        # 注意：xpath 函数返回的是list对象, 对象的元素是element
+        data = r.html.xpath('/html/body/div[2]/div[3]/div/div/div[2]/div/div/div/ul/li')
+        for item in data:
+            title = item.xpath('//a')[0].text
+            url = item.xpath('//a')[0].attrs.get('href')
+            region = '宁波市属国企'
+            publishDate = item.xpath('//p')[0].text
+            announcementType = '采购公告'
+            self.write_information(title, url, region, publishDate, announcementType)
+
+
+
+    # 宁波市中介超市网
+    def CrawlPage_zjcs_nbxzfw(self, page, typeParam):
+        # 这个方法是实际爬取指定页面的信息。
+        # type 用于判别采购信息的类型
+        session = HTMLSession()
+        urllist = ['http://zjcs.nbxzfw.gov.cn/newsweb/api/News/GetList?ClassId=0901&Type='+typeParam['announcementCode']+'&pageIndex='+str(page)+'&pageSize=15','http://zjcs.nbxzfw.gov.cn/newsweb/api/News/GetList?ClassId=0902&Type='+typeParam['announcementCode']+'&pageIndex='+str(page)+'&pageSize=15']
+
+        headers =  {
+            "Accept": "application/json, text/javascript, */*; q=0.01",
+            "Accept-Encoding": "gzip, deflate",
+            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "Connection": "keep-alive",
+            "DNT": '1',
+            "Host": "ygcg.nbcqjy.org",
+            "Referer": "http://zjcs.nbxzfw.gov.cn/newsweb/page/news/infolist.html?Type="+str(type),
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36 HBPC/12.1.3.306"
+        }
+
+        for url in urllist:
+            r = session.get(url = url, headers = headers)
+
+            if r.status_code != 200:
+                if page == 1:
+                    gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:宁波中介超市网', r.text)
+                return False
+
+            data = json.loads(r.text)['data']
+
+            total = data['total']
+            data = data['rows']
+
+            for item in data:
+                articleId = item['AutoId']
+                BulletinTypeId = item['BulletinTypeId']
+                url = 'http://zjcs.nbxzfw.gov.cn/YWGG/Info?id=%s&Type=%s' % (articleId, BulletinTypeId) 
+                title = item['BulletinTitle']
+                region = '宁波中介超市'
+                publishDate = item['PublishDate']
+                announcementType = typeParam['announcementType']
+                self.write_information(title, url, region, publishDate, announcementType)
+
+                #print(publishDate, url)
+
+
+    # 宁波阳光采购网
+    def CrawlPage_ygcg_nbcqjy_org(self, pages, typeParam):
+        url = 'https://ygcg.nbcqjy.org/list?type=2&class=%E5%85%AC%E5%91%8A%E5%85%AC%E7%A4%BA&noticeType=' + typeParam['announcementCode']
+        
+        wait_for = '.ant-pagination-item-ellipsis'
+        page_element = '.anticon-right'
+        try:
+            r = Splash().post(url, wait_for, pages=pages, page_element=page_element)
+        except Exception as e:
+            print(e)
+        
+        results = json.loads(r.text)
+
+        # 这个方法是实际爬取指定页面的信息。
+        if r.status_code != 200:
+            if page == 1:
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:宁波阳光采购网, 错误代码：'+str(r.status_code), r.text)
+            return False
+
+        for i in range(1, pages + 1):
+            data = HTML(html=results[str(i)]).xpath('/html/body/div/div/div[2]/div[2]/div/div/div[2]/div[2]/div[5]/div[1]/div/ul/li')
+            if len(data) == 0:
+                print('数据为空')
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:宁波阳光采购网, keyerror', e)
+                return False
+
+            for item in data:
+                url = 'http://ygcg.nbcqjy.org' + item.xpath('//a')[0].attrs.get('href') 
+                title = item.xpath('//a/span[3]')[0].text
+                region = '宁波阳光采购'
+                publishDate = item.xpath('//div[2]')[0].text
+                announcementType = typeParam['announcementType']
+                print(title)
+                self.write_information(title, url, region, publishDate, announcementType)
+
+
+    # 浙江政府采购网
+    def CrawlPage_zfcg_czt_zj(self, page, typeParam):
+        # 这个方法是实际爬取指定页面的信息。
+        session = HTMLSession()
+        url = 'https://zfcg.czt.zj.gov.cn/portal/category'
+        if typeParam['announcementCode'] == '110-420383':
+            data = {
+                "pageNo": page,
+                "pageSize": 15,
+                "categoryCode": typeParam['announcementCode'],
+                "districtCode": ["339900"],
+                "isProvince": True,
+                "includeGovDistrict": "1",
+                "_t": 1699104836000
+            }
+        else:
+            data = {
+                "pageNo": page,
+                "pageSize": 15,
+                "categoryCode": typeParam['announcementCode'],
+                "isGov": True,
+                "excludeDistrictPrefix": "90",
+                "_t": 1699104836000
+            }
+
+        headers =  {
+            "accept": "application/json, text/plain, */*",
+            "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
+            "content-type": "application/json;charset=UTF-8",
+            "sec-ch-ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"99\"",
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": "\"Windows\"",
+            "sec-fetch-dest": "empty",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-site": "same-origin",
+            "x-requested-with": "XMLHttpRequest"
+        }
+
+        try:
+            r = session.post(url = url, headers = headers, json = data)
+        except Exception as e:
+            print('10-------------------------', e)
+
+        if r.status_code != 200:
+            if page == 1:
+                gymailer.SendMail('jinqian_chen@126.com', 'jinqian.chen@srit.com.cn', '爬虫警告信息:宁波政府采购网', r.text)
+            return False
+
+        data = json.loads(r.text)['result']['data']
+        total = data['total']
+        data = data['data']
+
+        for item in data:
+            publishDate = datetime.datetime.fromtimestamp(item['publishDate']/1000)
+            pageUrl = 'https://zfcg.czt.zj.gov.cn/luban/detail?parentId=600007&articleId=' + item['articleId'] + '&utm=luban.luban-PC-37000.979-pc-websitegroup-zhejiang-secondPage-front.21.320086307d6811ee86314be74945ec2c'
+            detailUrl = 'https://zfcg.czt.zj.gov.cn/portal/detail?articleId=' + item['articleId']
+            announcementType = typeParam['announcementType']
+            if announcementType == '采购意向':
+                r = session.get(url = detailUrl, headers = headers)
+
+                detailData = json.loads(r.text)['result']['data']
+                if detailData == None:
+                    break
+
+                content = HTML(html='<xml>'+detailData['content']+'</xml>')
+                region = item['districtName']
+                for detailItem in content.xpath('xml/div/div/div[1]/div/table/tbody/tr'):
+                    title = detailItem.xpath('//td[2]')[0].text
+                    cgxqqk = detailItem.xpath('//td[3]')[0].text
+                    ysje = detailItem.xpath('//td[4]')[0].text
+                    yjcgsj = detailItem.xpath('//td[5]')[0].text
+                    ly = detailData["title"]
+                    
+                    self.write_information(title, pageUrl, region, publishDate, announcementType)
+                    self.write_information_cgyx({'cgxmmc':title,'lj':pageUrl, 'cgxqqk':cgxqqk, 'ysje':ysje, 'yjcgsj':yjcgsj, 'ly':ly})
+            else:
+                title = item['title']
+                region = item['districtName']
+                self.write_information(title, pageUrl, region, publishDate, announcementType)
+
+            #print(publishDate, url)
+
+
+        return True
--- a/dbsearch.py
+++ b/dbsearch.py
@ -0,0 +1,137 @@
+#!/usr/bin/python3
+
+import pymysql
+from properties import Properties
+import sys, getopt
+
+class DbSearch:
+    # 本类用于提供各类数据库信息搜索服务
+    def __init__(self, connect):
+        self.connect = connect
+
+    def GetTableList(self, database):
+        # 查询某个库的数据表的列表
+        cursorTable = self.connect.cursor()
+        cursorTable.execute("SELECT table_name FROM INFORMATION_SCHEMA.TABLES where table_schema = '" + database + "'");
+
+        return cursorTable.fetchall()
+
+    def GetColumnList(self, tableName):
+        # 查询某张表的数据字段列表
+        cursorColumn = self.connect.cursor()
+        cursorColumn.execute("SELECT column_name,data_type FROM INFORMATION_SCHEMA.COLUMNS where table_schema='" + database + "' AND table_name='" + 
+            tableName + "'");
+        return cursorColumn.fetchall()
+
+    def SearchTableByColumnName(self, columnName, database):
+        # 查询包含包含searchText的库表
+        tableList = self.GetTableList(database)
+        findList = list()
+        for table in tableList:
+            columnList = self.GetColumnList(table[0])
+            for column in columnList:
+                if column[0].find(columnName) != -1:
+                    findList.append(table[0])
+
+        return findList
+
+    def SearchTableByText(self, searchText, database):
+        # 查找包含searchText字符串的表，并显示相应的表记录
+        tableList = self.GetTableList(database)
+        if len(tableList) == 0:
+            return False
+
+        found = 0
+        findList = list()
+        for table in tableList:
+            strSql = "SELECT '" + table[0] + "' as table_name, t.* "
+            strSql = strSql + "  FROM " + database + "." + table[0] + " as t where " + "("
+            
+            columnList = self.GetColumnList(table[0])
+            i = 0
+
+            count = len(columnList)
+
+            for column in columnList:
+                # 如果字段数据类型为非文本型，跳过
+                if not column[1] in ('varchar', 'char', 'text'):
+                    continue
+                i += 1
+
+                if i > 1:
+                    strSql += " or "
+                strSql += column[0] + " like '%" + searchText + "%' "
+
+            strSql += ")"
+            
+            cursorColumn = self.connect.cursor()
+            try:
+                cursorColumn.execute(strSql)
+            except Exception as e:
+                print('2----------------------------', database, strSql)
+                print("-----错误信息：-----\n", e)
+                return False
+
+            result = cursorColumn.fetchall()
+            if len(result) > 0:
+                findList.append(table[0])
+                print("==========================================================================")
+                print(table[0], result, strSql)
+        return findList
+
+if __name__ == '__main__':
+    print(
+"""
+============================================================
+|这是数据库全文检索工具，包含两个参数                      |
+============================================================
+""")
+
+    # 设置运行环境。如果当前是测试环境，则将is_test设置为true
+    is_test = False
+
+    if is_test:
+        file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
+        database = 'guoyantest'
+    else:
+        file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
+        database = 'guoyan'
+
+    # 打开jdbc.properties文件，获取数据库的配置信息
+    props = Properties(file_path)
+    host = 'localhost'
+    user = props.get('jdbc.username')
+    password = props.get('jdbc.password')
+
+    # 打开数据连接
+    db = pymysql.connect(host = host, user = user, password = password, database = database)
+
+    # 获取命令行参数
+    keyword = ''
+    searchType =''
+
+    keyword = ''
+    searchType = ''
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hT:k:",["keyword=","searchType="])
+    except getopt.GetoptError:
+        print(sys.argv[0] + ' -k <keyword> -T <searchType>')
+        sys.exit(2)
+
+    for opt, arg in opts:
+        if opt == '-h':
+            print('3--------------------', 'test.py -k <keyword> -T <searchType>')
+            sys.exit()
+        elif opt in ("-k", "--keyword"):
+            keyword = arg
+        elif opt in ("-T", "--searchType"):
+            searchType = arg
+    dbSearch = DbSearch(db)
+    if searchType == '0':
+        print('正在根据您输入的关键词查找表.....................')
+        print('found tables: ', dbSearch.SearchTableByText(keyword, database))
+    elif searchType == '1':
+        print('正在根据您输入的列名查找表.....................')
+        print('found tables: ', dbSearch.SearchTableByColumnName(keyword, database))
+
+
--- a/gycrawler.py
+++ b/gycrawler.py
@ -0,0 +1,59 @@
+#!/usr/bin/python3
+"""这是爬虫的主程序主程序
+作者：陈进钱
+日期：2023/11/03
+"""
+
+import pymysql
+import datetime
+import time
+from apscheduler.schedulers.blocking import BlockingScheduler
+from properties import Properties
+from crawler import Crawler
+
+print(
+        """采购信息采集器 v1.0
+===================================================================================
+    这个程序用于获取各大招投标网站的采购信息
+    version: 1.0
+    作者：陈进钱
+    日期：2023-11-04
+===================================================================================""")
+
+# 设置运行环境。如果当前是测试环境，则将is_test设置为true
+is_test = False
+
+if is_test:
+    file_path = "/opt/eresource_test/webapp/WEB-INF/classes/prod/jdbc.properties"
+    database = 'guoyantest'
+else:
+    file_path = "/opt/eresource/webapp/WEB-INF/classes/prod/jdbc.properties"
+    database = 'guoyan'
+
+# 打开jdbc.properties文件，获取数据库的配置信息
+props = Properties(file_path)
+host = 'localhost'
+user = props.get('jdbc.username')
+password = props.get('jdbc.password')
+
+# 打开数据连接
+connect = pymysql.connect(host = host, user = user, password = password, database = database)
+
+# 获取采购信息，并填写到数据库中
+crawler = Crawler(connect)
+
+# 启动自动爬取任务
+def crawl_check_func():
+    crawler.Check()
+
+# 启动自动爬取任务
+def crawl_job_func():
+    crawler.Crawl()
+
+sched = BlockingScheduler()
+sched.add_job(crawl_job_func, 'interval', hours=3, jitter=120, max_instances=4)
+sched.add_job(crawl_check_func, 'interval', days=1, jitter=120, max_instances=4)
+sched.start()
+
+# 关闭数据库连接
+connect.close()
--- a/gymailer.py
+++ b/gymailer.py
@ -0,0 +1,44 @@
+import smtplib
+from email.mime.text import MIMEText
+from email.header import Header
+
+def SendMail(sender, receiver, subject, message):
+    # 发送邮件服务器
+    smtp_server = 'smtp.126.com'
+
+    # 发送邮件服务器端口
+    smtp_port = 465
+
+    # 邮件对象
+    msg = MIMEText(message, 'plain', 'utf-8')
+    msg['From'] = Header(sender, 'utf-8')
+    msg['To'] = Header(receiver, 'utf-8')
+    msg['Subject'] = Header(subject, 'utf-8')
+
+    # SMTP对象
+    smtpObj = smtplib.SMTP_SSL(smtp_server, smtp_port)
+
+    # 登录SMTP服务器
+    smtpObj.login(sender, 'ERXYFJRLKPTTDXWH')
+
+    # 发送邮件
+    smtpObj.sendmail(from_addr=sender,to_addrs=[receiver],msg=msg.as_string())
+
+    # 关闭SMTP连接
+    smtpObj.quit()
+
+if __name__ == '__main__':
+
+    # 发件人邮箱
+    sender = 'jinqian_chen@126.com'
+
+    # 收件人邮箱
+    receiver = 'jinqian.chen@srit.com.cn'
+
+    # 邮件主题
+    subject = 'Python3发送邮件示例, new'
+
+    # 邮件正文
+    message = '这是一封Python3发送的邮件'
+    SendMail(sender, receiver, subject, message)
+
--- a/jdbc.properties
+++ b/jdbc.properties
@ -0,0 +1,46 @@
+#Db2
+#hibernate.dialect=org.hibernate.dialect.DB2Dialect
+#jdbc.driverClassName=com.ibm.db2.jcc.DB2Driver
+#jdbc.url=jdbc:db2://localhost:50000/eaching
+
+#Oracle
+#hibernate.dialect=org.hibernate.dialect.Oracle10gDialect
+#jdbc.driverClassName=oracle.jdbc.driver.OracleDriver
+#jdbc.url=jdbc:oracle:thin:@47.99.208.214:1521:orcl
+#jdbc.url=jdbc:oracle:thin:@118.190.161.36:1521:orcl
+
+#SqlServer
+#hibernate.dialect=org.hibernate.dialect.SQLServerDialect
+#jdbc.driverClassName=net.sourceforge.jtds.jdbc.Driver
+#jdbc.url=jdbc:jtds:sqlserver://localhost:1433/guanwaimatou;SelectMethod=Cursor
+
+
+#MySql
+hibernate.dialect=org.hibernate.dialect.MySQLDialect
+jdbc.driverClassName=com.mysql.jdbc.Driver
+jdbc.url=jdbc:mysql://116.62.210.190:3306/guoyantest?autoReconnect=true&useUnicode=true&characterEncoding=UTF8&mysqlEncoding=utf8&zeroDateTimeBehavior=convertToNull
+
+jdbc.username=root
+jdbc.password=Guoyan83086775
+
+jdbc.maxConn=20
+jdbc.minConn=5
+jdbc.activeTime=900000
+jdbc.alias=eaching
+jdbc.keepingSleepTime=30000
+jdbc.maxConnectionLifetime=60000
+
+jdbc.multiSource=false
+
+hibernate.cache.use_second_level_cache=true
+hibernate.show_sql=false
+hibernate.generate_statistics=false
+hibernate.cache.provider_class=org.hibernate.cache.EhCacheProvider
+#hibernate.cache.provider_class=net.oschina.j2cache.hibernate3.J2CacheProvider
+hibernate.cache.use_minimal_puts=true
+hibernate.cache.use_structured_entries=true
+hibernate.cache.use_query_cache=true
+hibernate.use_sql_comments=trues
+hibernate.order_updates=true
+hibernate.format_sql=false
+hbm2ddl.auto=create
--- a/main.py
+++ b/main.py
@ -0,0 +1,75 @@
+#!/usr/bin/python3
+"""这是爬虫的主程序主程序
+作者：陈进钱
+日期：2023/11/03
+"""
+
+import pymysql
+import datetime
+import time
+from apscheduler.schedulers.blocking import BlockingScheduler
+from properties import Properties
+from crawler import Crawler
+import sys
+import os
+
+print(
+        """采购信息采集器 v1.0
+===================================================================================
+    这个程序用于获取各大招投标网站的采购信息
+    version: 1.0
+    作者：陈进钱
+    日期：2023-11-04
+===================================================================================""")
+
+# 设置运行环境。如果当前是测试环境，则将is_test设置为true
+is_test = True
+if is_test:
+    root = "/opt/eresource_test/webapp/WEB-INF/classes/prod/"
+else:
+    root = "/opt/eresource/webapp/WEB-INF/classes/prod/"
+
+if os.path.exists(root):
+    file_path = root + "jdbc.properties"
+else:
+    file_path = "jdbc.properties"
+
+if sys.platform == 'win32':
+    host = '116.62.210.190'
+    user = 'root'
+    password = 'Guoyan83086775'
+    if is_test:
+        database = 'guoyantest'
+    else:
+        database = 'guoyan'
+else:
+    if is_test:
+        database = 'guoyantest'
+    else:
+        database = 'guoyan'
+
+    # 打开jdbc.properties文件，获取数据库的配置信息
+    props = Properties(file_path)
+    host = '116.62.210.190'
+    user = props.get('jdbc.username')
+    password = props.get('jdbc.password')
+
+# 打开数据连接
+connect = pymysql.connect(host = host, user = user, password = password, database = database)
+
+# 获取采购信息，并填写到数据库中
+crawler = Crawler(connect)
+crawler.Crawl()
+#crawler.CrawlPage_ygcg_nbcqjy_org(1, {"announcementCode": "21", "announcementType":"采购公告"})
+#print(crawler.Check())
+
+# 启动自动爬取任务
+#def crawl_job_func():
+#    crawler.Crawl()
+
+#sched = BlockingScheduler()
+#sched.add_job(crawl_job_func, 'interval', hours=1, jitter=120)
+#sched.start()
+
+# 关闭数据库连接
+connect.close()
--- a/myrec.db
+++ b/myrec.db
--- a/properties.py
+++ b/properties.py
@ -0,0 +1,72 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+
+import re
+import os
+import tempfile
+
+
+class Properties:
+
+    def __init__(self, file_name):
+        # 如果配置文件不存在，取本地文件
+        if not os.path.exists(file_name):
+            file_name = 'jdbc.properties'
+
+        self.file_name = file_name
+        self.properties = {}
+        try:
+            fopen = open(self.file_name, 'r')
+            for line in fopen:
+                line = line.strip()
+                if line.find('=') > 0 and not line.startswith('#'):
+                    strs = line.split('=')
+                    self.properties[strs[0].strip()] = strs[1].strip()
+        except Exception as e:
+            raise e
+        else:
+            fopen.close()
+
+    def has_key(self, key):
+        return key in self.properties
+
+    def get(self, key, default_value=''):
+        if key in self.properties:
+            return self.properties[key]
+        return default_value
+
+    def put(self, key, value):
+        self.properties[key] = value
+        replace_property(self.file_name, key + '=.*', key + '=' + value, True)
+
+
+def replace_property(file_name, from_regex, to_str, append_on_not_exists=True):
+    tmpfile = tempfile.TemporaryFile()
+
+    if os.path.exists(file_name):
+        r_open = open(file_name, 'r')
+        pattern = re.compile(r'' + from_regex)
+        found = None
+        for line in r_open:
+            if pattern.search(line) and not line.strip().startswith('#'):
+                found = True
+                line = re.sub(from_regex, to_str, line)
+            tmpfile.write(line.encode())
+        if not found and append_on_not_exists:
+            tmpfile.write(('\n' + to_str).encode())
+        r_open.close()
+        tmpfile.seek(0)
+
+        content = tmpfile.read()
+
+        if os.path.exists(file_name):
+            os.remove(file_name)
+
+        w_open = open(file_name, 'wb')
+        w_open.write(content)
+        w_open.close()
+
+        tmpfile.close()
+    else:
+        print ("file %s not found" % file_name)
+
--- a/splash/SYgcg.py
+++ b/splash/SYgcg.py
@ -0,0 +1,14 @@
+#!/usr/bin/python3
+from splash.gysplash import SBase
+import json
+
+class SYgcg(SBase):
+    def open(self):
+        return super().open('ygcg', pages=2, annoucement_type='政府采购')
+
+if __name__ == '__main__':
+    test = SYgcg()
+    r = test.open()
+
+    results = json.loads(r.text)
+    print(results)
--- a/splash/init.py
+++ b/splash/init.py
--- a/splash/pycache/SYgcg.cpython-310.pyc
+++ b/splash/pycache/SYgcg.cpython-310.pyc
--- a/splash/pycache/init.cpython-310.pyc
+++ b/splash/pycache/init.cpython-310.pyc
--- a/splash/pycache/gysplash.cpython-310.pyc
+++ b/splash/pycache/gysplash.cpython-310.pyc
--- a/splash/config/splash.cnf
+++ b/splash/config/splash.cnf
@ -0,0 +1,7 @@
+#This is splash config file.
+[splash service settings]
+server = localhost
+port = 8050
+
+
+
--- a/splash/config/splash.json
+++ b/splash/config/splash.json
@ -0,0 +1,20 @@
+{
+	"description": "This is splash config file.", 
+	"server": "127.0.0.1", 
+	"port": "8050",
+	"class":{
+		"SYgcg":{
+			"url":"https://ygcg.nbcqjy.org/list?type=2&class=%E5%85%AC%E5%91%8A%E5%85%AC%E7%A4%BA&noticeType={{  $noticeType  }}",
+			"_comment":"http://www.baidu.com",
+			"param":{
+				"noticeType":"21"
+			},
+			"wait_for":".ant-list-items",
+			"page_element":".anticon-right",
+			"headers":{
+				"content-type":"application/json",
+				"Accept-Language": "zh-CN,zh;q=0.9,en;q=0"
+			}
+		}
+	}
+}
--- a/splash/configscripts/main.lua
+++ b/splash/configscripts/main.lua
@ -0,0 +1 @@
+This is lua script file
--- a/splash/gysplash.py
+++ b/splash/gysplash.py
@ -0,0 +1,122 @@
+#!/usr/bin/python3
+'''===================================================================
+这个模块用于对 splash 服务进行封装，方便在 python 中使用。
+版本：1.0
+作者：陈进钱
+日期：2023-12-18
+==================================================================='''
+import os
+import re
+import json
+import codecs
+import configparser
+from requests_html import HTMLSession
+from requests_html import HTML
+
+config = configparser.ConfigParser()
+# splash 基类
+class SBase:
+    def __init__(self):
+        self.__lua_script = ''
+        self.config = {}
+
+        # 创建 ConfigParser 对象
+        self.root = os.path.dirname(os.path.abspath(__file__))
+
+        # 自动创建配置文件
+        dir = self.root + '/config'
+        if not os.path.exists(dir):
+            os.makedirs(dir)
+
+        file_path = self.root + '/config/splash.json'
+
+        if os.path.exists(file_path):
+            file = codecs.open(file_path, 'r', 'utf-8')
+            content = file.read()
+            self.config = json.loads(content)
+            file.close()
+        else:
+            self.config['description'] = 'This is splash config file.'
+            self.config['server'] = 'localhost'
+            self.config['port'] = '8050'
+
+            content = json.dumps(self.config)
+            with codecs.open(file_path, 'w', 'utf-8') as file:
+                file.write(content)
+
+        # 自动创建空的脚本文件
+        dir = self.root + '/scripts'
+        if not os.path.exists(dir):
+            os.makedirs(dir)
+
+        # 这个代码要更新为一个通用代码
+        file_path = dir + '/main.lua'
+
+        if os.path.exists(file_path):
+            file = codecs.open(file_path, 'r', 'utf-8')
+            self.__lua_script = file.read()
+            file.close()
+        else:
+            with codecs.open(file_path, 'w', 'utf-8') as file:
+                self.__lua_script = 'This is lua script file'
+                file.write(self.__lua_script)
+
+    def script(self):
+        return self.__lua_script;
+
+    def class_name(self):
+        return type(self).__name__;
+
+    def replace(self, source, param, value):
+        return re.sub('{{[\s]*\$' + param + '[\s]*}}', value, source)
+    
+    # 向lua脚本传递参数变量 
+    def set_params_for_lua(self, scripts, params):
+        for param in params:
+            scripts = self.replace(scripts, param, params[param])
+        return scripts
+
+    '''
+    --------------------------------------------------------------------------------------
+    本函数用于打开指定的网址。具体的网址、参数、等待就绪的网页元件、等待就绪的翻页元件、
+    headers 等参数默认为空。这些参数的任意一个为空时，则从配置文件中的相关类名项下提取。
+    本函数会调用 lua 主脚本文件，执行页面解析的lua脚本文件。该文件名称通过参数 parser 传递。
+    --------------------------------------------------------------------------------------
+    '''
+    def open(self):
+        pass
+    def open(self, scripts_js, pages=1, url='', params=None,
+             wait_for='', page_element='', headers='', annoucement_type=''):
+        if url == '':
+            url = self.config['class'][self.class_name()]['url']
+            if params == None:
+                params = self.config['class'][self.class_name()]['param']
+            if len(params) > 0:
+                for param in params:
+                    url = self.replace(url, param, params[param])
+
+        if wait_for == '':
+            wait_for = self.config['class'][self.class_name()]['wait_for']
+
+        if page_element =='':
+            page_element = self.config['class'][self.class_name()]['page_element']
+
+        if headers == '':
+            headers = self.config['class'][self.class_name()]['headers']
+        scripts = self.script()
+        scripts = self.set_params_for_lua(scripts, {
+            'pages':str(pages),
+            'url':url,
+            'wait_for':wait_for,
+            'page_element':page_element,
+            # 这个解析器要从通过参数传递
+            'scripts_js': scripts_js,
+            'announcement_type':annoucement_type
+        })
+
+        # print(scripts)
+        data = json.dumps({'lua_source':scripts})
+        splash_url = 'http://' + self.config['server'] + ':' + self.config['port'] + '/execute'
+        r = HTMLSession().post(splash_url, headers=headers, data=data)
+
+        return r
--- a/splash/scripts/main.lua
+++ b/splash/scripts/main.lua
@ -0,0 +1,75 @@
+-- 本文件是页面抓取的主入口
+-- 这里必须采用加载模块的方法，否则好像不能动态加载js文件
+parser = require('parser')
+
+function main(splash, args)
+  pages = {{$pages}}
+  scripts_js = '{{$scripts_js}}'
+  page_element = '{{$page_element}}'
+  wait_for = '{{$wait_for}}'
+  announcement_type = '{{$announcement_type}}'
+  splash:go('{{$url}}')
+  wait_for_element(splash, wait_for)
+  wait_for_element(splash, page_element)
+
+  -- 设置javascript脚本参数
+  results = {}
+  params_js = {}
+  params_js['announcement_type'] = announcement_type
+
+  -- 将第一页的结果加入返回结果集中
+  result = parser.select(splash, scripts_js, params_js)
+  table.insert(results, result)
+
+  if pages == 1 then
+    return results
+  else
+    -- 执行翻页动作
+    -- 先页面上的翻页元件（element），然后发送点击事件（click()）翻页
+    for i = 2, pages do
+      -- 执行翻页脚本
+      -- js 中是javascript脚本，用于获取翻页的元件，并发送click事件
+      js = string.format("document.querySelector('%s').click();", page_element)
+      splash:runjs(js)
+
+      -- 等待页面加载完成
+      wait_for_element(splash, wait_for)
+      wait_for_element(splash, page_element)
+
+      -- 这个地方看来必须加上延时，否则页面加载不完全，可能还没有完成页面更新
+      assert(splash:wait(5))
+      result = parser.select(splash, scripts_js, params_js)
+      table.insert(results, result)
+    end
+    return results
+  end
+end
+
+function wait_for_element(splash, css, maxwait)
+  -- Wait until a selector matches an element
+  -- in the page. Return an error if waited more
+  -- than maxwait seconds.
+  if maxwait == nil then
+    maxwait = 10
+  end
+  return splash:wait_for_resume(string.format([[
+    function main(splash) {
+      var selector = '%s';
+      var maxwait = %s;
+      var end = Date.now() + maxwait*1000;
+
+      function check() {
+        if(document.querySelector(selector)) {
+          splash.resume('Element found');
+        } else if(Date.now() >= end) {
+          var err = 'Timeout waiting for element';
+          splash.error(err + " " + selector);
+        } else {
+          setTimeout(check, 200);
+        }
+      }
+      check();
+    }
+  ]], css, maxwait))
+end
+
--- a/splash/scripts/modules/jquery-3.7.1.min.js
+++ b/splash/scripts/modules/jquery-3.7.1.min.js
--- a/splash/scripts/modules/parser.lua
+++ b/splash/scripts/modules/parser.lua
@ -0,0 +1,28 @@
+-- 文件名为 module.lua
+-- 定义一个名为 module 的模块
+parser = {}
+
+function set_params(scripts, params_js)
+    for param, value in pairs(params_js) do
+        scripts = scripts.gsub(scripts, "{{(%s*)$" .. param .. "(%s*)}}", value)
+    end
+    --scripts = scripts.gsub('123456  aaaa  123456', "[\s\\\]*aaaa\\\[\\\\s\\\]*", 'bbbb')
+    return scripts
+end
+
+-- 定义一个函数
+function parser.select(splash, scripts_js, params_js)
+    local file = io.open("/etc/splash/lua_modules/jquery-3.7.1.min.js", "r")
+    splash:runjs(file:read('*a'))
+    file:close()
+
+    file = assert(io.open("/etc/splash/lua_modules/"..scripts_js..".js", "r"))
+    scripts = file:read('*a')
+    scripts = set_params(scripts, params_js)
+    local js = splash:jsfunc(scripts)
+    file:close()
+
+    return js()
+end
+
+return parser
--- a/splash/scripts/modules/ygcg.js
+++ b/splash/scripts/modules/ygcg.js
@ -0,0 +1,32 @@
+function () {
+    title = '';
+    url = '';
+    updateTime = '';
+    region = '';
+    announcementType = '';
+    results = {};
+    lists = new Array();
+
+    // 取列表的头
+    ul = $('#app > div > div.z_list_vue > div.ant-spin-nested-loading > div > div > div.z_content > div.z_detail_content > div:nth-child(5) > div.ant-spin-nested-loading > div > ul');
+    // 获取列表的第一个元素，获取成功的话，元素封装对象的length = 1
+    li = ul.children('li').first()
+    item = {}
+    while (li.length == 1)
+    {
+        a = li.find('div.ant-list-item-meta > div > h4 > span > a');
+        item.title = $(a.children()['2']).attr('title');
+        item.url = a.attr('href');
+        item.updateTime = $(li.children()[1]).text();
+        item.region = '宁波阳光采购';
+        item.announcementType = '{{$announcement_type}}'
+
+        lists.push(item)
+        // 取下一个列表元素
+        li = li.next()
+    }
+
+    results.count = lists.length
+    results.lists = lists
+    return results
+}
--- a/splash/scripts/modules/zepto.js
+++ b/splash/scripts/modules/zepto.js
--- a/splash/scripts/modules/zepto.js.1
+++ b/splash/scripts/modules/zepto.js.1