프로젝트 코드 일부분
from bs4 import BeautifulSoup
from selenium import webdriver
from urllib.parse import urlparse
import requests as req
from yaml import load
import p01
class Cllct:
def __init__(self):
self.elastic = p01.Elastic
self.url = None
self.path = None
self.bsObj = None
self.element = []
# elasticsearch (1) srv connect
def ElasticSrvConnect(self):
self.elastic.ElasticSrvConnect()
# elasticsearch (2) healthCheck
def ElasticsHealthCheck(self):
self.elastic.ElasticsHealthCheck()
# elasticsearch (3) Data insert
def ElasticsInsertDocument(self):
self.elastic.InsertDocument(x=self.element)
# Instance method (1)
def urlSetting(self):
with open("./CONFIG/info.yaml", "r") as f:
txt = load(f.read())
self.url = txt["url"]
self.path = txt["path"]
f.close()
# Instance method (2)
def requestURL(self):
html = req.get(url=self.url+self.path)
if html.status_code == 200:
self.bsObj = BeautifulSoup(html.text, "html.parser")
mvLst = self.bsObj.find_all("div", {"class":"tit3"})
for indx, vale in enumerate(mvLst):
insertData = {"name":None, "numb":None, "showtime":None, "showday":None, "nation":None}
showt, showd, nation = self.SubInfo(vale.a.attrs["href"]) # Function call
insertData["name"] = vale.a.attrs["title"]
insertData["numb"] = indx+1
insertData["showtime"] = showt
insertData["showday"] = showd
insertData["nation"] = nation
Result = "영화 이름 : {n}, 영화 순위 : {o}, 영화 상영시간 : {t}, 영화 상영날짜 : {d}, 제작 국가 : {s}".\
format(n = insertData["name"], o = insertData["numb"], t = insertData["showtime"],
d = insertData["showday"], s = insertData["nation"])
print (Result)
self.element.append(insertData)
def SubInfo(self, subpath):
nation = None # 제작국가
showtime = None # 상영시간
showday = None # 상영날짜
html = req.get(self.url + subpath)
if html.status_code == 200:
bsObject = BeautifulSoup(html.text, "html.parser")
mvInfo = bsObject.select_one("div.mv_info > dl.info_spec > dd > p")
try:
# 국가
nation = mvInfo.select_one("span:nth-of-type(2) > a").string
except:
return showtime, showday, nation
else:
try:
# 상영시간
showtime = mvInfo.select_one("span:nth-of-type(3)").string
except:
return showtime, showday, nation
else:
try:
# 상영날짜
showday = mvInfo.select_one("span:nth-of-type(4) > a:nth-of-type(2)").attrs["href"]
except:
try:
showday = mvInfo.select_one("span:nth-of-type(3) > a:nth-of-type(2)").attrs["href"]
except:
return showtime, showday, nation
else:
showday = urlparse(showday).query
showday = str(showday).split("=")[1]
# return 순서 : 상영시간, 상영날짜, 국가
return showtime, showday, nation
else:
showday = urlparse(showday).query
showday = str(showday).split("=")[1]
# return 순서 : 상영시간, 상영날짜, 국가
return showtime, showday, nation
def main():
cnode = Cllct() # 객체 생성
cnode.ElasticSrvConnect()
# cnode.ElasticsHealthCheck()
# ------------------------------
cnode.urlSetting()
cnode.requestURL()
cnode.ElasticsInsertDocument()
if __name__ == "__main__":
main()
'언어 > python' 카테고리의 다른 글
pysimplegui (0) | 2019.02.10 |
---|---|
python + crawling + elasticsearch (0) | 2019.02.04 |
프로젝트 디렉토리 (0) | 2019.01.13 |
project 일부 코드 (0) | 2019.01.08 |
Project 일부분 (0) | 2019.01.05 |