맛집 크롤링
# =================================
from selenium import webdriver
from bs4 import BeautifulSoup
from tinydb import TinyDB
import time
import pprint as ppr
# =================================
class DBstu:
# 생성자
def __init__(self):
# DB 생성
self.DB = TinyDB("eatList.db")
# 테이블 선택
self.user = self.DB.table("MOM")
class CWAL(DBstu):
# 생성자
def __init__(self):
DBstu.__init__(self)
# driver 위치
self.driver = webdriver.Chrome("C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe")
self.html = None
self.bsObject = None
self.titleList = []
# ============================
self.naverReq() # func call
# func (1)
def naverReq(self):
self.driver.get("https://www.naver.com/")
time.sleep(2) # 2초
self.driver.find_element_by_id("query").send_keys("부천 맛집")
self.driver.find_element_by_id("search_btn").click()
time.sleep(2) # 2초
# ============================
self.bsReq() # func call
# func (2)
def bsReq(self):
# //*[@id="place_main_ct"]/div/div/div[2]/div[4]/div/a[2]
page = 1
while True:
print("{0:02d}:page 작업 중 ...".format(page))
self.html = self.driver.page_source
self.bsObject = BeautifulSoup(self.html, "html.parser")
tmp = self.bsObject.find_all("div", {"class":"list_item_inner "})
""" tmp : list """
for i in tmp:
tmpDict = {}
n = i.select_one("div.info_area > div.tit > span.tit_inner > a.name")
p = i.select("div.info_area > div.etc_area.ellp > span.item")
r = '' # empty
# ========================
try:
result = p[1]
except:
pass
else:
r = result.text
finally:
time.sleep(3) # 3초
# ========================
tmpDict[n.attrs['title']] = r
self.titleList.append(tmpDict)
# page next
self.driver.find_element_by_xpath('//*[@id="place_main_ct"]/div/div/div[2]/div[4]/div/a[2]').click()
time.sleep(3)
if page == 20: break
else: page += 1
# ============================
self.dataPrintf() # func call
# func (3)
def dataPrintf(self):
for i in self.titleList:
for k, v in i.items():
self.user.insert({"가게이름":k, "가격":v})
print("가게 이름: {} , 가격: {} ... 적재 성공".format(k, v))
def main():
bsNode = CWAL() # 객체 생성
if __name__ == "__main__":
main()