from selenium import webdriver
import requests
from bs4 import BeautifulSoup
import time
import tkinter # GUI PROGRAM
from openpyxl import Workbook
from openpyxl.styles import Color, PatternFill, Font
import pprint as ppr
import os
import sys
import win32api
import subprocess
#-------------------------------
class DataGoKr:
def __init__(self, s):
self.url = "https://www.data.go.kr/"
self.chromeDriver = webdriver.Chrome("C:\\Users\\junhyeon.kim\\Documents\\chrome_driver\\chromedriver.exe")
self.html = None
self.searchData = s
self.workBook = Workbook() # work-book ( excel )
self.workSheet = None # work-sheet ( excel )
self.dataTotal = dict()
self.sigungu = ["부산광역시", "서울특별시", "울산광역시", "대전광역시", "강원도", "인천광역시", "경상북도",
"광주광역시", "충청북도", "경기도", "전라남도", "대구광역시", "제주특별자치도", "충청남도",
"경상남도", "전라북도"]
# Instance method (1)
def DirectoryListing(self):
if self.searchData in [ os.path.splitext(f)[0] for f in os.listdir()]:
win32api.MessageBox(0, "exist file", "<디렉토리>")
exit(1)
else:
win32api.MessageBox(0, "확인을 누르시면 엑셀 파일을 생성하겠습니다.", "<디렉토리>")
# Instance method (2)
def UrlRequests(self):
self.chromeDriver.get(self.url)
assert "공공데이터포털" in self.chromeDriver.title
time.sleep(3) # - 3초
self.chromeDriver.fullscreen_window()
# 검색어 입력 : 공공데이터
self.chromeDriver.find_element_by_name("query").send_keys(self.searchData)
# 버튼 입력
self.chromeDriver.find_element_by_xpath('//*[@id="home-search-form"]/button/i').click()
time.sleep(2) # - 2초
# openapi click
self.chromeDriver.find_element_by_xpath('//*[@id="openapiTab"]/a/span').click()
# function call
self.UrlParcing()
# Instance method (3)
def UrlParcing(self):
time.sleep(2) # - 2초
self.html = BeautifulSoup(self.chromeDriver.page_source, "html.parser")
list_data_item = self.html.find_all("div", {"class":"data-item"})
page_ = 1
for i in list_data_item:
tmp = {
"meta":None, "desc":None, "type":None, "page":None
}
title_ = str(i.select_one("div.data-title > a").text).strip()
# ===============================================================================
meta_sub_dict = {}
# 수정일
t = str(i.select_one("div.data-meta > span:nth-of-type(1)").text).split(sep=":")
f = t[0].rstrip()
r = t[1].lstrip()
meta_sub_dict[f] = r
# 기관
t = str(i.select_one("div.data-meta > span:nth-of-type(2)").text).split(sep=":")
f = t[0].rstrip()
r = t[1].lstrip()
meta_sub_dict[f] = r
# 서비스 유형
t = str(i.select_one("div.data-meta > span:nth-of-type(3)").text).split(sep=":")
f = t[0].rstrip()
r = t[1].lstrip()
meta_sub_dict[f] = r
# ===============================================================================
desc_ = str(i.select_one("div.data-desc").text).strip()
type_ = str(i.select_one("div.data-types > span.data-type.XML").string)
# --------------------------
tmp['meta'] = meta_sub_dict
tmp['desc'] = desc_
tmp['type'] = type_
tmp['page'] = page_
# --------------------------
self.dataTotal[title_] = tmp
ppr.pprint (self.dataTotal)
page_ += 1
# Instance method (4)
def XlWrite(self):
INDEX = ["B", "C", "D", "E", "F", "G", "H"]
"""
B => 타이틀
C => 수정일
D => 기관
E => 서비스 유형
F => 속성
G => 데이터 타입
H => 페이지
"""
num_index = 2
self.workSheet = self.workBook.active
# 열 채우기 색 ____________________________________________________________________________
gray = PatternFill(start_color="F2DCDB", end_color="F2DCDB", fill_type="solid")
# _______________________________________________________________________________________
# 열 너비 조정 _______________________________ _____________________________________________
self.workSheet.column_dimensions["A"].width = 0.47
self.workSheet.column_dimensions["B"].width = 48.5 # 타이틀
self.workSheet.column_dimensions["C"].width = 9.5 # 수정일
self.workSheet.column_dimensions["D"].width = 28.9 # 기관
self.workSheet.column_dimensions["E"].width = 10.4 # 서비스 유형
self.workSheet.column_dimensions["F"].width = 64.3 # 속성
self.workSheet.column_dimensions["G"].width = 10.4 # 데이터 타입
self.workSheet.column_dimensions["H"].width = 8.1 # 페이지
for indx in ["B2", "C2", "D2", "E2", "F2", "G2", "H2"]:
self.workSheet[indx].fill = gray
# _______________________________________________________________________________________
# 인덱스 생성 _____________________________________________________________________________
for i in INDEX:
self.workSheet[INDEX[0] + str(num_index)] = "타이틀"
self.workSheet[INDEX[1] + str(num_index)] = "수정일"
self.workSheet[INDEX[2] + str(num_index)] = "기관"
self.workSheet[INDEX[3] + str(num_index)] = "서비스 유형"
self.workSheet[INDEX[4] + str(num_index)] = "속성"
self.workSheet[INDEX[5] + str(num_index)] = "데이터 타입"
self.workSheet[INDEX[6] + str(num_index)] = "페이지"
# _______________________________________________________________________________________
num_index += 1
# 데이터 값 적재
for k, v in self.dataTotal.items():
self.workSheet[INDEX[0] + str(num_index)] = k
self.workSheet[INDEX[1] + str(num_index)] = v["meta"]["수정일"]
self.workSheet[INDEX[2] + str(num_index)] = v["meta"]["기관"]
self.workSheet[INDEX[3] + str(num_index)] = v["meta"]["서비스유형"]
self.workSheet[INDEX[4] + str(num_index)] = v["desc"]
self.workSheet[INDEX[5] + str(num_index)] = v["type"]
self.workSheet[INDEX[6] + str(num_index)] = v["page"]
num_index += 1
self.workBook.save(self.searchData + ".xlsx")
# 파일 열기
def XlFileOpen(self):
subprocess.call(self.searchData + ".xlsx", shell=True)
# 소멸자
def __del__(self):
self.workBook.close()
def main():
searchnode = DataGoKr("공공데이터") # 객체 생성
searchnode.DirectoryListing()
searchnode.UrlRequests()
searchnode.XlWrite()
searchnode.XlFileOpen()
if __name__ == "__main__":
try:
os.chdir("C:\\Users\\junhyeon.kim\\Desktop\\doc")
except:
try:
os.mkdir("C:\\Users\\junhyeon.kim\\Desktop\\doc")
except:
sys.exit(1)
else:
os.chdir("C:\\Users\\junhyeon.kim\\Desktop\\doc")
win32api.MessageBox(0, "{}".format(os.path.abspath(os.getcwd())), "<현재 디렉토리>")
else:
win32api.MessageBox(0, "{}".format(os.path.abspath(os.getcwd())), "<현재 디렉토리>")
main()