카라바오 컵 파이썬 크롤링
from selenium import webdriver
import requests
import pprint as ppr
import time
from bs4 import BeautifulSoup
import re
import openpyxl
from openpyxl.styles import Font, Alignment, PatternFill, Color
#===============================
class VISUAL:
def __init__(self):
self.wb = openpyxl.Workbook() # 객체 생성
self.ws = None # workSheet
self.excelIndex = [chr(i) for i in range(ord('B'), ord('I')+1)]
self.FristIndex = ['날짜', '경기시작_시간', '팀L', '점수', '-', '점수', '팀R', '경기장']
self.IndexWidth = [8.7, 13.2, 25, 8.1, 1.5, 8.1, 25, 17.2]
class SELEN:
def __init__(self):
# chrome driver
self.path = "C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe"
self.chrome_driver = webdriver.Chrome(self.path)
class STU(SELEN, VISUAL):
def __init__(self):
VISUAL.__init__(self)
SELEN.__init__(self)
# Instance variable
self.target_url = "https://www.naver.com/"
self.day_list = {"32강":['schedule_20180926',
'schedule_20180927',
'schedule_20181003 last'],
"16강":['schedule_20181101']}
self.html = None
self.bs_obj = None
self.information = {
"32강":[],
"16강":[]
}
# Instance method
# func (1)
def urlReguests(self):
self.chrome_driver.get(self.target_url)
time.sleep(2) # 2 second
# 검섹어 입력
self.chrome_driver.find_element_by_id('query').send_keys('카라바오컵')
self.chrome_driver.find_element_by_xpath('//*[@id="search_btn"]/span[2]').click()
time.sleep(2) # 2 second
"""
32강 : //*[@id="_calLayerBaseSportsDbSearch"]/div[2]/div/div[3]/ul/li[1]/a
16강 : //*[@id="_calLayerBaseSportsDbSearch"]/div[2]/div/div[3]/ul/li[2]/a
"""
# func (2)
def htmlURL(self):
tpage = {
"32강":1,
"16강":2
}
for key, p in tpage.items():
# 페이지 switch
tmp_bnt = '//*[@id="_calLayerBaseSportsDbSearch"]/div[2]/div/div[3]/ul/li[{}]/a'.format(p)
self.chrome_driver.find_element_by_xpath(tmp_bnt).click()
time.sleep(1)
self.html = self.chrome_driver.page_source
time.sleep(3)
self.bs_obj = BeautifulSoup(self.html, "html.parser")
# #강 - 날짜
t = [k for k in self.day_list[key]]
fnum = re.compile("\d{8}")
for i in t:
indx = fnum.search(i).span()
print(i[indx[0]:indx[1]], " : 작업 중 ...")
tmp_day_info = self.bs_obj.find_all("tr", {"class": i})
time.sleep(2)
for s in tmp_day_info:
# ==========================================
# 날짜
t_day = s.find("td", {'class': 'date tp4'})
t_day = t_day.string
# 시간
t_time = s.find("td", {'class': 'time tp4'})
t_time = t_time.string
# 경기장
t_stadium = s.select("p.elps > a")
t_stadium = t_stadium[0].attrs['title']
# left_team_infomation
left_team = s.select("td.score.mgn.tp4 > a.match > em.team_lft.team_lft2")
left_team = left_team[0].attrs['title']
# left_team_score
left_score = s.select("span.score_lft")
left_score = left_score[0].text
# right_team_infomation
rigth_team = s.select("td.score.mgn.tp4 > a.match > em.team_rgt.team_rgt2")
rigth_team = rigth_team[0].attrs['title']
# right_team_score
right_score = s.select("span.score_rgt")
right_score = right_score[0].text
# ==========================================
tmp_info = {
"날짜": None,
"경기 시작 시간": None,
"경기 결과": {
'LEFT':{
'Name':None,
'Score':None},
'RIGH':{
'Name': None,
'Score': None}},
"경기장": None}
tmp_info['날짜'] = t_day
tmp_info['경기 시작 시간'] = t_time
tmp_info["경기 결과"]['LEFT']['Name'] = left_team
tmp_info["경기 결과"]['LEFT']['Score'] = left_score
tmp_info["경기 결과"]['RIGH']['Name'] = rigth_team
tmp_info["경기 결과"]['RIGH']['Score'] = right_score
tmp_info['경기장'] = t_stadium
self.information[key].append(tmp_info)
def visualDo(self):
for k in self.information.keys():
self.ws = self.wb.create_sheet("CarabaoCup_"+k)
self.ws['B4'] = k
# Frist
num = 6
for i in zip(self.excelIndex, self.FristIndex, self.IndexWidth):
self.ws[i[0]+str(num)] = i[1]
self.ws.column_dimensions[i[0]].width = i[2]
num += 1
# Data insert
for element in self.information[k]:
inx = 0
# 날짜 데이터
self.ws[self.excelIndex[inx] + str(num)] = element['날짜'];inx += 1
# 시간 데이터
self.ws[self.excelIndex[inx] + str(num)] = element['경기 시작 시간']
self.ws[self.excelIndex[inx] + str(num)].alignment = \
Alignment(horizontal='center', vertical='center');inx += 1
# 팀 left ================================================
# 팀 이름
self.ws[self.excelIndex[inx] + str(num)] = \
element['경기 결과']['LEFT']['Name']
self.ws[self.excelIndex[inx] + str(num)].font = \
Font(color='87a8ee')
self.ws[self.excelIndex[inx] + str(num)].alignment = \
Alignment(horizontal='right', vertical='center');inx += 1
# 팀 score
# ========================================================
score_left = False
score_right = False
if element['경기 결과']['LEFT']['Score'] > element['경기 결과']['RIGH']['Score']:
score_left = True
elif element['경기 결과']['LEFT']['Score'] < element['경기 결과']['RIGH']['Score']:
score_right = True
# ========================================================
self.ws[self.excelIndex[inx] + str(num)] = \
str(element['경기 결과']['LEFT']['Score'])
self.ws[self.excelIndex[inx] + str(num)].alignment = \
Alignment(horizontal='right', vertical='center')
self.ws[self.excelIndex[inx] + str(num)].font = \
Font(bold=True)
if score_left:
self.ws[self.excelIndex[inx] + str(num)].font = \
Font(color="ca054d", bold=True)
self.ws[self.excelIndex [inx] + str(num)].fill = \
PatternFill(patternType='solid', fgColor=Color('d8d8d5'));inx += 1
self.ws[self.excelIndex[inx] + str(num)] = ":";inx += 1
# 팀 right ===============================================
# 팀 score
self.ws[self.excelIndex[inx] + str(num)] = \
str(element['경기 결과']['RIGH']['Score'])
self.ws[self.excelIndex[inx] + str(num)].font = \
Font(bold=True)
if score_right:
self.ws[self.excelIndex[inx] + str(num)].font = \
Font(color="ca054d", bold=True)
self.ws[self.excelIndex[inx] + str(num)].fill = \
PatternFill(patternType='solid', fgColor=Color('d8d8d5'));inx += 1
# 팀 이름
self.ws[self.excelIndex[inx] + str(num)] = \
element['경기 결과']['RIGH']['Name']
self.ws[self.excelIndex[inx] + str(num)].font = \
Font(color='87a8ee');inx += 1
self.ws[self.excelIndex[inx] + str(num)] = \
element['경기장'];
num += 1
def __del__(self):
self.wb.save("CarabaoCup.xlsx")
print ("excel 저장 성공")
def main():
stuNode = STU() # 객체 생성
stuNode.urlReguests()
stuNode.htmlURL()
stuNode.visualDo()
if __name__ == "__main__":
main()