2019 - 02 - 04 산출물

---------------------------

from elasticsearch import Elasticsearch

from json import loads, load

from urllib.parse import urlencode

import requests

from bs4 import BeautifulSoup

import re

from semi_proj.elasticSearchNode import Ela

class NMV:

    def __init__(self):

        self.__url  = None

        self.__path = None

        self.__param = {"sel":None, "page":None, "date":None, "tg":None}

        self.__bsobj = None

        self.element = []


    def paramSetting(self):

        with open("./info.json", "r", encoding="utf-8") as f:

            json_doc = loads(f.read())

            print (json_doc)

            self.__url  = json_doc["url"]

            self.__path = json_doc["path"]

            self.__param["sel"]  = json_doc["sel"]

            self.__param["date"] = json_doc["date"]

            self.__param["tg"]  = json_doc["tg"]

            f.close()


    def indexCreate(self):

        Ela.createIndex()


    def urlRequests(self):

        #page

        for p in range(1, 4):

            self.__param["page"] = p

            param = urlencode(self.__param)


            # https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=pnt&date=20190202&tg=18&page=1

            requ = self.__url + self.__path + "?" +param

            html = requests.get(requ)

            if html.status_code == 200:

                self.__bsobj= BeautifulSoup(html.text, "html.parser")

                a_list = list(map(lambda x:x.attrs["href"], self.__bsobj.select("div.tit5 > a")))

                for i in a_list:

                    # print (i)


                    # ex: => https://movie.naver.com/movie/bi/mi/basic.nhn?code=10200

                    sub_url = self.__url + i

                    html = requests.get(sub_url)

                    if html.status_code == 200:

                        self.__bsobj= BeautifulSoup(html.text, "html.parser")

                        info = self.__bsobj.select_one("div.mv_info")

                        try:

                            movie_name1 = info.select_one("h3.h_movie > a").string

                        except AttributeError as e:

                            print (e)

                            pass

                        else:

                            movie_name2 = re.sub("[\n\r\t]", "", str(info.select_one("strong.h_movie2").string))

                            # 개요

                            # #content > div.article > div.mv_info_area > div.mv_info > dl

                            tmp_summary = self.__bsobj.select_one("#content > div.article > div.mv_info_area > div.mv_info > dl > dd > p")

                            tmp_dir = {

                                "movie_name1": movie_name1,

                                "movie_name2": movie_name2,

                                "jangr":None,

                                "nation":None,

                                "minute":None,

                            }

                            for i in range(1, 5):

                                # ========================

                                jangr  = {}  # 장르

                                nation = {}  # 제작 국가

                                minute = {}  # 런타임

                                day    = {}  # 개봉일

                                # ========================

                                tmp = tmp_summary.select_one("span:nth-of-type({})".format(i))

                                if  i != 3:

                                    tmp = tmp.select("a")

                                    tmp_list = list(map(lambda x:x.string, [x.string for x in tmp]))

                                    tmp_list = list(map(lambda x: re.sub("[\n\r\t ]", "", x.string), tmp_list))

                                    if i == 1:

                                        jangr["jangr"] = tmp_list

                                        tmp_dir["jangr"] = jangr["jangr"]

                                    elif i == 2:

                                        nation["nation"] = tmp_list

                                        tmp_dir["nation"] = nation["nation"]

                                    elif i == 4:

                                        ttmp_list = []

                                        for i in range(0, len(tmp_list), 2):

                                            ttmp_list.append(tmp_list[i] + tmp_list[i+1 ])

                                        day["day"] = ttmp_list

                                        tmp_dir['day'] = day["day"]

                                else: # i == 3

                                    tmp_list = list(map(lambda x:re.sub("[\n\r\t ]", "",x.string), tmp))

                                    minute["minute"] = tmp_list

                                    tmp_dir["minute"] = minute["minute"]

                            print (tmp_dir)

                            self.element.append(tmp_dir)

        Ela.dataInsert(self.element)


class T(NMV):

    pass


t = T()

# t.indexCreate()

t.paramSetting()

t.urlRequests()




-------------------------------

json 파일


{

  "url": "https://movie.naver.com",

  "path": "/movie/sdb/rank/rmovie.nhn",

  "sel": "pnt",

  "date":"20190202",

  "tg":"18"

}


------------------------------------------

elasticsearch


from elasticsearch import Elasticsearch

import json

class Ela:

    es = Elasticsearch(hosts="192.168.240.129", port=9200)

    @classmethod

    def createIndex(cls):

        # ===============

        # 인덱스 생성

        # ===============

        cls.es.indices.create(

            index = "today19020402",

            body = {

                "settings": {

                  "number_of_shards": 5

                }

            }

        )

    @classmethod

    def indexExists(cls, paramindex):

        ####

        # 해당 index가 존재하는가?

        ####

        bool = cls.es.indices.exists(index=paramindex)

        print (bool)


    @classmethod

    def indexCount(cls):

        print (cls.es.count(index="today19020402"))


    @classmethod

    def dataInsert(cls, element):

        # ===============

        # 데이터 삽입

        # ===============

        for n, i in enumerate(element):

            res = cls.es.index(index="today19020402", doc_type="today", id=n + 1, body=i)

            print(res)


Ela.indexExists("today190204012")

Ela.indexCount()


'언어 > python' 카테고리의 다른 글

python + 지하철 + 이미지  (0) 2019.02.24
pysimplegui  (0) 2019.02.10
프로젝트 코드 일부분  (0) 2019.01.20
프로젝트 디렉토리  (0) 2019.01.13
project 일부 코드  (0) 2019.01.08

from bs4 import BeautifulSoup

from selenium import webdriver

from urllib.parse import urlparse

import requests as req

from yaml import load

import p01

class Cllct:

    def __init__(self):

        self.elastic = p01.Elastic

        self.url = None

        self.path = None

        self.bsObj = None

        self.element = []


    # elasticsearch (1)  srv connect

    def ElasticSrvConnect(self):

        self.elastic.ElasticSrvConnect()


    # elasticsearch (2)  healthCheck

    def ElasticsHealthCheck(self):

        self.elastic.ElasticsHealthCheck()


    # elasticsearch (3) Data insert

    def ElasticsInsertDocument(self):

        self.elastic.InsertDocument(x=self.element)


    # Instance method (1)

    def urlSetting(self):

        with open("./CONFIG/info.yaml", "r") as f:

            txt = load(f.read())

            self.url = txt["url"]

            self.path = txt["path"]

            f.close()


    # Instance method (2)

    def requestURL(self):

        html = req.get(url=self.url+self.path)

        if html.status_code == 200:

            self.bsObj = BeautifulSoup(html.text, "html.parser")

            mvLst = self.bsObj.find_all("div", {"class":"tit3"})

            for indx, vale in enumerate(mvLst):

                insertData = {"name":None, "numb":None, "showtime":None, "showday":None, "nation":None}

                showt, showd, nation = self.SubInfo(vale.a.attrs["href"]) # Function call

                insertData["name"] = vale.a.attrs["title"]

                insertData["numb"] = indx+1

                insertData["showtime"] = showt

                insertData["showday"] = showd

                insertData["nation"] = nation

                Result = "영화 이름 : {n}, 영화 순위 : {o}, 영화 상영시간 : {t}, 영화 상영날짜 : {d}, 제작 국가 : {s}".\

                    format(n = insertData["name"], o = insertData["numb"], t = insertData["showtime"],

                           d = insertData["showday"], s = insertData["nation"])

                print (Result)

                self.element.append(insertData)


    def SubInfo(self, subpath):

        nation = None   # 제작국가

        showtime = None # 상영시간

        showday = None  # 상영날짜

        html = req.get(self.url + subpath)

        if html.status_code == 200:

            bsObject = BeautifulSoup(html.text, "html.parser")

            mvInfo = bsObject.select_one("div.mv_info > dl.info_spec > dd > p")

            try:

                # 국가

                nation = mvInfo.select_one("span:nth-of-type(2) > a").string

            except:

                return showtime, showday, nation

            else:

                try:

                    # 상영시간

                    showtime = mvInfo.select_one("span:nth-of-type(3)").string

                except:

                    return showtime, showday, nation

                else:

                    try:

                        # 상영날짜

                        showday = mvInfo.select_one("span:nth-of-type(4) > a:nth-of-type(2)").attrs["href"]

                    except:

                        try:

                            showday = mvInfo.select_one("span:nth-of-type(3) > a:nth-of-type(2)").attrs["href"]

                        except:

                            return showtime, showday, nation

                        else:

                            showday = urlparse(showday).query

                            showday = str(showday).split("=")[1]

                            # return 순서 : 상영시간, 상영날짜, 국가

                            return showtime, showday, nation

                    else:

                        showday = urlparse(showday).query

                        showday = str(showday).split("=")[1]

                        # return 순서 : 상영시간, 상영날짜, 국가

                        return showtime, showday, nation


def main():

    cnode = Cllct() # 객체 생성

    cnode.ElasticSrvConnect()

    # cnode.ElasticsHealthCheck()

    # ------------------------------

    cnode.urlSetting()

    cnode.requestURL()

    cnode.ElasticsInsertDocument()

if __name__ == "__main__":

    main()


'언어 > python' 카테고리의 다른 글

pysimplegui  (0) 2019.02.10
python + crawling + elasticsearch  (0) 2019.02.04
프로젝트 디렉토리  (0) 2019.01.13
project 일부 코드  (0) 2019.01.08
Project 일부분  (0) 2019.01.05

# =======================
import os
import re
import shutil
# =======================
class Proj:
category = None
current_dir = None
@classmethod
def WorkingDirecMove(cls):
try:
os.chdir(r"C:\Users\user\Desktop\forest")
except FileNotFoundError as e:
print (e)
else:
print ("Current directory : {}".format(os.path.abspath(os.curdir)))

@classmethod
# 디렉토리 생성
def MakeDirectory(cls):
with open("C:\\Users\\user\\Desktop\\forest_project\\info_list", "r", encoding="utf-8") as f:
text = f.readlines()
f.close()
cls.category = [str(i+1) + "." + re.sub("\n", "", x) for i, x in enumerate(text)]

for i in cls.category:
if "/" in i:
i = i.replace("/", "-")
try:
os.mkdir(i)
except:
print ("디렉토리 생성 실패")
with open("C:\\Users\\user\\Desktop\\forest_project\\error_log.txt", "a", encoding="utf-8") as f:
f.write(i + "\n")
f.close()
else:
print (i + " : 디렉토리 생성 성공")

cls.current_dir = [f for f in os.listdir()]

@classmethod
def DataMove(cls):
os.chdir("C:\\Users\\user\\Desktop\\백업\\dev\\데이터수집\\수집데이터메뉴얼")
tmp_dir = list()
# 디렉토리 확인
for d in os.listdir():
# 만약 디렉토리라면
if os.path.isdir(d):
# 만약 비어있지 않은 디렉토리라면
if len(os.listdir(d)):
# 데이터 추가
tmp_dir.append(d)


for x in tmp_dir:
if x in cls.current_dir:
for y in os.listdir(x):
shutil.copy(x+"\\"+y, "C:\\Users\\user\\Desktop\\forest\\{}\\".format(x))
def main():
Proj.WorkingDirecMove()
Proj.MakeDirectory()
Proj.DataMove()

if __name__ == "__main__":
main()


'언어 > python' 카테고리의 다른 글

python + crawling + elasticsearch  (0) 2019.02.04
프로젝트 코드 일부분  (0) 2019.01.20
project 일부 코드  (0) 2019.01.08
Project 일부분  (0) 2019.01.05
project_openapi_ch  (0) 2018.12.31

from selenium import webdriver

from t01 import *

from bs4 import BeautifulSoup

import requests

from urllib.parse import urlencode

import time

import json

import pprint as ppr

import re

"""

국보/ 보물/ 사적/ 명승/ 등록문화재 

"""


class Heriage:

    def __init__(self):

        self.url = "http://heritage.go.kr"

        self.path = "/heri/cul/culSelectRegionList.do"

        self.params = {

            "s_kdcd":None,    # 11 : 국보/ 12 : 보물/ 13 : 사적/ 15 : 명승/ 79 : 등록문화재

            "s_ctcd":None,    # 시  => ex) 서울 : 11

            "ccbaLcto":None,  # 도  => ex) 종로구 : 11

            "culPageNo":None, # 현재 페이지

        }

        self.total_data = []  # list

        self.html = None

        self.bs_object = None


    # Instance method (1)

    def UrlRequests(self):

        # ===================================

        self.params["s_kdcd"] = 11   # 국보

        self.params["s_ctcd"] = 11   # 시

        self.params["ccbaLcto"] = 11 # 도

        self.params["culPageNo"] = 1 # 현재 페이지

        # url encoding

        enc = urlencode(self.params)

        # ===================================

        target_url = self.url + self.path + "?" + enc

        # print (target_url)

        # requests.get()

        self.html = requests.get(target_url)

        if self.html.status_code == 200:

            self.bs_object = BeautifulSoup(self.html.text, "html.parser")

            t_data = self.bs_object.select("table.tbl.type_1.c_result > tbody > tr > td:nth-of-type(3)")

            for i in t_data:

                n = re.sub(pattern="[\r,\n, ]",repl="", string= str(i.text))

                sub_url = i.select_one("a").attrs["href"]

                f_idx, r_idx  = str(sub_url).find("("), str(sub_url).find(")")

                sub_url = (str(sub_url)[f_idx+1:r_idx]).replace("'", "").split(sep=",")

                print (n, sub_url)

                t_param = {

                    "s_kdcd":self.params["s_kdcd"],         "s_ctcd":self.params["s_ctcd"],

                    "ccbaKdcd":sub_url[0],                  "ccbaAsno":sub_url[1],

                    "ccbaCtcd":sub_url[2],                  "ccbaCpno":sub_url[3],

                    "culPageNo":self.params["culPageNo"],

                }

                # data

                # 국보, 시, 도, 이름, 소재지

                data = {"s_kdcd": self.params["s_kdcd"],     # 국보

                        "s_ctcd": self.params["s_ctcd"],     # 시

                        "ccbaLcto": self.params["ccbaLcto"], # 도

                        "name": n,                           # 이름

                        "location": None}                    # 소재지

                self.total_data.append(self.SubUrlRequests(t_param, data)) # function call


    @staticmethod

    def SubUrlRequests(params, data):

        enc = urlencode(params)

        target_url = "http://heritage.go.kr/heri/cul/culSelectDetail.do?" + enc

        html = requests.get(target_url)

        if html.status_code == 200:

            bs_obj = BeautifulSoup(html.text, "html.parser")

            txt = bs_obj.select_one("div.hschDetail_info > table.hschDi_info > tbody > tr:nth-of-type(4) > td")

            data["location"] = str(txt.text).strip()

            return data

        else:

            return None

def main():

    node = Heriage() # 객체 생성

    node.UrlRequests()

    print(node.total_data)

if __name__ == "__main__":

    main()



'언어 > python' 카테고리의 다른 글

프로젝트 코드 일부분  (0) 2019.01.20
프로젝트 디렉토리  (0) 2019.01.13
Project 일부분  (0) 2019.01.05
project_openapi_ch  (0) 2018.12.31
test_code  (0) 2018.12.30

Project 일부분

언어/python2019. 1. 5. 16:58

'''

@ junhyeon.kim

@ emil - sleep4725@naver.com

@ 공원 정보

'''

# ==================================

import requests

from yaml import load, load_all, YAMLError

import sys

from urllib.parse import urlencode

import pprint

import re

import json

# ==================================

class PROJ:

    # 생성자

    def __init__(self):

        self.url_target = None

        self.url_path   = None

        '''

            << yaml file info >>

            params:

            path: /openapi/services/rest/ArpltnInforInqireSvc/getCtprvnRltmMesureDnsty

            serviceKey: McNJ0S9TlZmlJyQpHpMfZ2JU7kv0AlFlcsWgBPZxrPO1ddMd2N4Od61wsdpN+b/Op2LARnanjLLDF9do2+N45w==

            pageNo: 1

            numOfRows: 10

            itemCode: PM10

            dataGubun: HOUR

            searchCondition: MONTH

        '''

        self.params = {

            "serviceKey":None, "pageNo":None, "numOfRows":None,  "itemCode":None,

            "dataGubun":None,  "searchCondition":None, "_returnType":None,

        }

        self.yaml_doc = None

        self.resp_data = None

        self.json_data = []

        self.f = None

        self.jsonFileCreate()  # function call

        self.localinfo = {

            "busan":"부산",     "chungbuk":"충북",  "chungnam":"충남",  "daegu":"대구",

            "daejeon" :"대전",  "gangwon":"강원" ,  "gwangju":"광주",   "gyeongbuk":"경북",

            "gyeonggi":"경기",  "gyeongnam":"경남", "incheon":"인천",   "jeju":"제주",

            "jeonbuk":"전북",   "jeonnam":"전남",   "sejong":"세종",    "seoul":"서울",

            "ulsan":"울산",

        }


    # instance method - 0

    # json 파일 생성

    def jsonFileCreate(self):

        try:

            self.f = open("./OUTPUT/DU_JSON/du.json", "x")

        except FileExistsError as e:

            print (e)

        else:

            self.f.close()


    # instance method - 1

    # yaml 파일 읽어 오기

    def paramsSett(self):

        try:

            with open("./CONF/DU/data_go_kr_dust", "r", encoding="utf-8") as f:

                self.yaml_doc = load(f)

                f.close()

        except FileNotFoundError as e: # 해당 파일이 조재하지 않는 경우

            print (e)

            sys.exit(1)   # program 종료

        except YAMLError as e:

            print(e)

            sys.exit(1)  # program 종료

        else:

            print (self.yaml_doc)

            # params setting  =========================================

            self.url_target = self.yaml_doc["url"]                               # url

            self.url_path = self.yaml_doc["params"]["path"]                      # url path

            self.params['serviceKey'] = self.yaml_doc["params"]["serviceKey"]    # serviceKey

            self.params['pageNo'] = self.yaml_doc["params"]["pageNo"]            # pageNo

            self.params['numOfRows'] = self.yaml_doc["params"]["numOfRows"]      # numOfRows

            self.params['itemCode'] = self.yaml_doc["params"]["itemCode"]              # type (json/xml)

            self.params['dataGubun'] = self.yaml_doc["params"]["dataGubun"]  # numOfRows

            self.params['searchCondition'] = self.yaml_doc["params"]["searchCondition"]

            self.params['_returnType'] = self.yaml_doc["params"]["_returnType"]

            # =========================================================


    # instance method - 2

    def urlRequests(self):

        e_params = urlencode(self.params)

        url = self.url_target + self.url_path + "?" + e_params

        # print (url)

        html = requests.get(url)

        if html.status_code == 200:

            self.resp_data = html.json()

            # pprint.pprint (self.resp_data)


    # instance method - 3

    def reponseDataParcing(self):

        data = self.resp_data["list"]

        '''

        dataTime

        부산 : busan,   충북 : chungbuk,  충남 : chungnam, 대구 : daegu,     대전 : daejeon, 강원 : gangwon,

        광주 : gwangju, 경북 : gyeongbuk, 경기 : gyeonggi, 경남 : gyeongnam, 인천 : incheon, 제주 : jeju,

        전북 : jeonbuk, 전남 : jeonnam,   세종 : sejong,   서울 : seoul,     울산 : ulsan

        '''

        for i in data:


            tmp_dict = dict(i)


            for remv in ['_returnType', 'dataGubun', 'dataTerm', 'numOfRows', 'totalCount', 'pageNo',

                         'resultCode', 'resultMsg', 'searchCondition','serviceKey', 'itemCode']:

                try:

                    tmp_dict.pop(remv)

                except KeyError as e:

                    print (e)


            tmp_k = [k for k in tmp_dict.keys()]

            for k in tmp_k:

                t = {"si-name": None, "pm": None, "date-time": None}

                if k != "dataTime":

                    t["si-name"] = self.localinfo[k]

                    t['pm'] = tmp_dict[k]

                    t['date-time'] = tmp_dict['dataTime']

                    self.json_data.append(t)

        # json 파일에 데이터 적재

        with open("./OUTPUT/DU_JSON/du.json", "a", encoding="utf-8") as make_json:

            json.dump(self.json_data, make_json, ensure_ascii=False, indent="\t")

            make_json.close()


def main():

    proj_node = PROJ() # 객체 생성

    proj_node.paramsSett()

    proj_node.urlRequests()

    proj_node.reponseDataParcing()

if __name__ == "__main__":

    main()

'언어 > python' 카테고리의 다른 글

프로젝트 디렉토리  (0) 2019.01.13
project 일부 코드  (0) 2019.01.08
project_openapi_ch  (0) 2018.12.31
test_code  (0) 2018.12.30
python => from : crawling to : json  (0) 2018.12.23

project_openapi_ch

언어/python2018. 12. 31. 21:02

# 문화재청 위치 정보

# JunHyeon.Kim

"""

Response

01. ccmaName     : 문화재유형

02. crltsnoNm    : 지정호수

03. ccbaMnm1     : 문화재명

04. ccbaMnm2     : 문화재명

05. ccbaCtcdNm

06. ccsiName

07. ccbaAdmin

08. ccbaKdcd

09. ccbaCtcd

10. ccbaAsno

11. ccbaCncl

12. ccbaCpno

"""

# ------------------------------------

import requests as req

from urllib.parse import urlencode

import xml.etree.ElementTree as ET

import json

import os

import sys

# ------------------------------------

class INFO:

    def __init__(self):

        '''

        "18": "국가민속문화재", "31": "문화재자료", "80": "이북5도 무형문화재", "17": "국가무형문화재",

        "24": "시도민속문화재", "22": "시도무형문화재", "21": "시도유형문화재", "23": "시도기념물",

        "16": "천연기념물",

        '''

        self.ccbaKdcd = { # 종목 코드

            "11": "국보",         "12": "보물",

            "13": "사적",         "14": "사적및명승",

            "15": "명승",         "16": "천연기념물",

            "79": "등록문화재",

        }

        self.ccbaCtcd = { # 시도 코드

            '11': "서울" ,'21': "부산",

            '22': "대구" ,'23': "인천",

            '24': "광주" ,'25': "대전",

            '26': "울산" ,'45': "세종",

            '31': "경기" ,'32': "강원",

            '33': "충북" ,'34': "충남",

            '35': "전북" ,'36': "전남",

            '37': "경북" ,'38': "경남",

            '50': "제주" }


class CH_Location_Search:

    def __init__(self):

        self.url = "http://www.gis-heritage.go.kr/openapi/xmlService/spca.do"

        self.params = {"ccbaMnm1":None}


    def urlRequests(self):

        t_param = urlencode(self.params)

        t_url = self.url + "?" + t_param

        t_html = req.get(t_url)

        if t_html.status_code == 200:

            try:

                with open(file="../CONF_CURTURE/ch_{}_localtion.xml".format(self.params["ccbaMnm1"]), mode='w', encoding='utf-8') as f:

                    f.write(t_html.text)

            except:

                if OSError.errno == 22:

                    if "<" in str(self.params["ccbaMnm1"]):

                        self.params["ccbaMnm1"] = str(self.params["ccbaMnm1"]).replace(old="<", new=" ")

                    elif ">" in str(self.params["ccbaMnm1"]):

                        self.params["ccbaMnm1"] = str(self.params["ccbaMnm1"]).replace(old=">", new=" ")

                    elif "," in str(self.params["ccbaMnm1"]):

                        self.params["ccbaMnm1"] = str(self.params["ccbaMnm1"]).replace(old=",", new=" ")

                try:

                    with open(file="../CONF_CURTURE/ch_{}_localtion.xml".format(self.params["ccbaMnm1"]), mode='w',

                              encoding='utf-8') as f:

                        f.write(t_html.text)

                except:

                    sys.exit(1)

                else:

                    f.close()

            else:

                f.close()

class CH:

    node = INFO()                          # 포함관계의 객체

    chlocation_node = CH_Location_Search() # 포함관계의 객체

    # ====================================================

    url = "http://www.cha.go.kr/cha/SearchKindOpenapiList.do"

    params = {"ccbaCtcd":None, "ccbaKdcd":None}  # 시도 코드 :=> 11(서울)

    response_data = None


    @classmethod

    def urlRequests(cls):

        # params encoding

        for k1, v1 in cls.node.ccbaCtcd.items(): # ___ <시도 코드 셋팅>

            cls.params["ccbaCtcd"] = k1

            for k2, v2 in cls.node.ccbaKdcd.items(): # ___ <종목 코드 셋팅>

                cls.params["ccbaKdcd"] = k2

                local_params = urlencode(cls.params)

                local_url    = cls.url + "?" + local_params

                html = req.get(local_url)

                if html.status_code == 200:

                    # print (html.text)

                    with open(file="../CONF/ch_{0}_{1}.xml".format(v1, v2), mode='w', encoding='utf-8') as f:

                        f.write(html.text)

                        f.close()

            print ("{0} {1} 작업 끝".format(v1,v2))


    @classmethod

    def xmlParsingToJson(cls):

        # 디렉토리 이동

        os.chdir("../CONF")

        for f in os.listdir():

            fname, fext = os.path.splitext(f)

            if fext == ".xml":

                tree = ET.parse(f)

                root = tree.getroot()

                for elem in root:

                    for subelem in elem:

                        # if subelem.tag == "ccmaName":   # 문화재 유형

                        #     print (subelem.tag + ":" + subelem.text)

                        if subelem.tag == "ccbaMnm1": # 문화재 명

                            print(subelem.tag + ":" + subelem.text)

                            print ("================================")

                            # parameter 설정

                            cls.chlocation_node.params['ccbaMnm1'] = subelem.text

                            # url requests

                            cls.chlocation_node.urlRequests()

        '''

        json 파일 구조 

        "지역명": "경기" 

        '''

CH.urlRequests()

CH.xmlParsingToJson()



'언어 > python' 카테고리의 다른 글

project 일부 코드  (0) 2019.01.08
Project 일부분  (0) 2019.01.05
test_code  (0) 2018.12.30
python => from : crawling to : json  (0) 2018.12.23
python json 파일 변환 - 기상청 데이터  (0) 2018.12.22

test_code

언어/python2018. 12. 30. 12:46
from selenium import webdriver
import requests
from bs4 import BeautifulSoup
import time
import tkinter # GUI PROGRAM
from openpyxl import Workbook
from openpyxl.styles import Color, PatternFill, Font
import pprint as ppr
import os
import sys
import win32api
import subprocess
#-------------------------------
class DataGoKr:
def __init__(self, s):
self.url = "https://www.data.go.kr/"
self.chromeDriver = webdriver.Chrome("C:\\Users\\junhyeon.kim\\Documents\\chrome_driver\\chromedriver.exe")
self.html = None
self.searchData = s
self.workBook = Workbook() # work-book ( excel )
self.workSheet = None # work-sheet ( excel )
self.dataTotal = dict()
self.sigungu = ["부산광역시", "서울특별시", "울산광역시", "대전광역시", "강원도", "인천광역시", "경상북도",
"광주광역시", "충청북도", "경기도", "전라남도", "대구광역시", "제주특별자치도", "충청남도",
"경상남도", "전라북도"]

# Instance method (1)
def DirectoryListing(self):
if self.searchData in [ os.path.splitext(f)[0] for f in os.listdir()]:
win32api.MessageBox(0, "exist file", "<디렉토리>")
exit(1)
else:
win32api.MessageBox(0, "확인을 누르시면 엑셀 파일을 생성하겠습니다.", "<디렉토리>")

# Instance method (2)
def UrlRequests(self):
self.chromeDriver.get(self.url)
assert "공공데이터포털" in self.chromeDriver.title
time.sleep(3) # - 3
self.chromeDriver.fullscreen_window()
# 검색어 입력 : 공공데이터
self.chromeDriver.find_element_by_name("query").send_keys(self.searchData)
# 버튼 입력
self.chromeDriver.find_element_by_xpath('//*[@id="home-search-form"]/button/i').click()
time.sleep(2) # - 2
# openapi click
self.chromeDriver.find_element_by_xpath('//*[@id="openapiTab"]/a/span').click()
# function call
self.UrlParcing()

# Instance method (3)
def UrlParcing(self):
time.sleep(2) # - 2
self.html = BeautifulSoup(self.chromeDriver.page_source, "html.parser")
list_data_item = self.html.find_all("div", {"class":"data-item"})
page_ = 1

for i in list_data_item:
tmp = {
"meta":None, "desc":None, "type":None, "page":None
}
title_ = str(i.select_one("div.data-title > a").text).strip()
# ===============================================================================
meta_sub_dict = {}
# 수정일
t = str(i.select_one("div.data-meta > span:nth-of-type(1)").text).split(sep=":")
f = t[0].rstrip()
r = t[1].lstrip()
meta_sub_dict[f] = r

# 기관
t = str(i.select_one("div.data-meta > span:nth-of-type(2)").text).split(sep=":")
f = t[0].rstrip()
r = t[1].lstrip()
meta_sub_dict[f] = r

# 서비스 유형
t = str(i.select_one("div.data-meta > span:nth-of-type(3)").text).split(sep=":")
f = t[0].rstrip()
r = t[1].lstrip()
meta_sub_dict[f] = r
# ===============================================================================
desc_ = str(i.select_one("div.data-desc").text).strip()
type_ = str(i.select_one("div.data-types > span.data-type.XML").string)
# --------------------------
tmp['meta'] = meta_sub_dict
tmp['desc'] = desc_
tmp['type'] = type_
tmp['page'] = page_
# --------------------------
self.dataTotal[title_] = tmp
ppr.pprint (self.dataTotal)
page_ += 1

# Instance method (4)
def XlWrite(self):
INDEX = ["B", "C", "D", "E", "F", "G", "H"]
"""
B => 타이틀
C => 수정일
D => 기관
E => 서비스 유형
F => 속성
G => 데이터 타입
H => 페이지
"""
num_index = 2
self.workSheet = self.workBook.active
# 열 채우기 색 ____________________________________________________________________________
gray = PatternFill(start_color="F2DCDB", end_color="F2DCDB", fill_type="solid")
# _______________________________________________________________________________________

# 열 너비 조정 _______________________________ _____________________________________________
self.workSheet.column_dimensions["A"].width = 0.47
self.workSheet.column_dimensions["B"].width = 48.5 # 타이틀
self.workSheet.column_dimensions["C"].width = 9.5 # 수정일
self.workSheet.column_dimensions["D"].width = 28.9 # 기관
self.workSheet.column_dimensions["E"].width = 10.4 # 서비스 유형
self.workSheet.column_dimensions["F"].width = 64.3 # 속성
self.workSheet.column_dimensions["G"].width = 10.4 # 데이터 타입
self.workSheet.column_dimensions["H"].width = 8.1 # 페이지

for indx in ["B2", "C2", "D2", "E2", "F2", "G2", "H2"]:
self.workSheet[indx].fill = gray
# _______________________________________________________________________________________

# 인덱스 생성 _____________________________________________________________________________
for i in INDEX:
self.workSheet[INDEX[0] + str(num_index)] = "타이틀"
self.workSheet[INDEX[1] + str(num_index)] = "수정일"
self.workSheet[INDEX[2] + str(num_index)] = "기관"
self.workSheet[INDEX[3] + str(num_index)] = "서비스 유형"
self.workSheet[INDEX[4] + str(num_index)] = "속성"
self.workSheet[INDEX[5] + str(num_index)] = "데이터 타입"
self.workSheet[INDEX[6] + str(num_index)] = "페이지"
# _______________________________________________________________________________________
num_index += 1

# 데이터 값 적재
for k, v in self.dataTotal.items():
self.workSheet[INDEX[0] + str(num_index)] = k
self.workSheet[INDEX[1] + str(num_index)] = v["meta"]["수정일"]
self.workSheet[INDEX[2] + str(num_index)] = v["meta"]["기관"]
self.workSheet[INDEX[3] + str(num_index)] = v["meta"]["서비스유형"]
self.workSheet[INDEX[4] + str(num_index)] = v["desc"]
self.workSheet[INDEX[5] + str(num_index)] = v["type"]
self.workSheet[INDEX[6] + str(num_index)] = v["page"]
num_index += 1
self.workBook.save(self.searchData + ".xlsx")

# 파일 열기
def XlFileOpen(self):
subprocess.call(self.searchData + ".xlsx", shell=True)

# 소멸자
def __del__(self):
self.workBook.close()

def main():
searchnode = DataGoKr("공공데이터") # 객체 생성
searchnode.DirectoryListing()
searchnode.UrlRequests()
searchnode.XlWrite()
searchnode.XlFileOpen()

if __name__ == "__main__":
try:
os.chdir("C:\\Users\\junhyeon.kim\\Desktop\\doc")
except:
try:
os.mkdir("C:\\Users\\junhyeon.kim\\Desktop\\doc")
except:
sys.exit(1)
else:
os.chdir("C:\\Users\\junhyeon.kim\\Desktop\\doc")
win32api.MessageBox(0, "{}".format(os.path.abspath(os.getcwd())), "<현재 디렉토리>")
else:
win32api.MessageBox(0, "{}".format(os.path.abspath(os.getcwd())), "<현재 디렉토리>")
main()


'언어 > python' 카테고리의 다른 글

Project 일부분  (0) 2019.01.05
project_openapi_ch  (0) 2018.12.31
python => from : crawling to : json  (0) 2018.12.23
python json 파일 변환 - 기상청 데이터  (0) 2018.12.22
OPENAPI  (0) 2018.12.21

from bs4 import BeautifulSoup

from selenium import webdriver

import pymysql

from urllib.parse import urlencode

import time

import pprint

import sys

import json

# =====================================

class Req:

    def __init__(self):

        self.driver_path = "C:\\Users\\sleep\\Desktop\\chrom_driver\\chromedriver.exe"

        self.url="https://ko.wikipedia.org/wiki/위키백과:대한민국의_문화재_목록"

        # Chrome driver ---

        self.chrome_driver = None

        self.xpath_list = None

        self.xpathSetting() # function call

        self.bs_object = None


        self.total_data = {} # dictionary


    # Instance Method (1)

    def xpathSetting(self):

        ''' //*[@id="mw-content-text"]/div/ul/li[{}]/a '''

        self.xpath_list = ['//*[@id="mw-content-text"]/div/ul/li[{}]/a'.format(i) for i in range(1, 18)]

        print (self.xpath_list)


    # Instance Method (2)

    def urlRequests(self):

        self.chrome_driver = webdriver.Chrome(self.driver_path)

        self.chrome_driver.get(self.url)

        assert "위키백과:대한민국의 문화재 목록 - 위키백과, 우리 모두의 백과사전" in self.chrome_driver.title

        for x in self.xpath_list:

            self.chrome_driver.find_element_by_xpath(x).click()

            time.sleep(2)

            # BeautifulSoup ------------------------------------------

            self.bs_object = BeautifulSoup(self.chrome_driver.page_source, "html.parser")

            self.chrome_driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

            '''

            1> h1 - class firstHeading /위키백과:서울특별시의 문화재 목록

            '''

            title = self.bs_object.select_one("h1#firstHeading").string

            title = title.split(sep=":")[1]

            title = title.split(sep=" ")[0]

            title = title[:len(title)-1]

            print (title)

            self.total_data[title] = {} # 삽입되서 들어가야될 데이터

            #====================================================

            tmp_list = self.bs_object.select("span.mw-headline")

            for i, t in enumerate([x.attrs['id'] for x in tmp_list]):

                list_data = list()

                data = self.bs_object.select("#mw-content-text > div > table:nth-of-type({}) > tbody > tr > td.fn.org > a".format(i+1))

                # self.total_data[title][e] = list(map(lambda x:' '.join(x.split(' ')[1:]) , [e.attrs['title'] for e in data]))

                for e in data:

                    x = str(e.attrs['title'])

                    if x.count(" "):

                        list_data.append(' '.join(x.split(sep=" ")[1:]))

                    else:

                        list_data.append(x)

                self.total_data[title][t] = list_data

            # '''

            # 2> span#국보.mw-headline

            # '''

            # national_treasure = self.bs_object.select(

            #     "#mw-content-text > div > table:nth-of-type(1) > tbody > tr > td.fn.org > a")

            # # mw-content-text > div > table:nth-child(4) > tbody > tr:nth-child(1) > td.fn.org > a

            # # mw-content-text > div > table:nth-child(10) > tbody > tr:nth-child(1) > td.fn.org > a

            # self.total_data[title]["국보"] = [e.attrs['title'] for e in national_treasure]

            # print ("국보 : {}".format(self.total_data[title]["국보"]))

            # '''

            # 3> 보물

            # '''

            # treasure = self.bs_object.select(

            #     "#mw-content-text > div > table:nth-of-type(2) > tbody > tr > td.fn.org > a")

            # self.total_data[title]["보물"] = [e.attrs['title'] for e in treasure]

            # print("보물 : {}".format(self.total_data[title]["보물"]))

            # '''

            # 4> 사적

            # '''

            # historical_site = self.bs_object.select(

            #     "#mw-content-text > div > table:nth-of-type(3) > tbody > tr > td.fn.org > a")

            # self.total_data[title]["사적"] = [e.attrs['title'] for e in historical_site]

            # print("사적 : {}".format(self.total_data[title]["사적"]))

            # '''

            # 5> 지방 유형 문화재

            # '''

            # culture = self.bs_object.select(

            #     "#mw-content-text > div > table:nth-of-type(4) > tbody > tr > td.fn.org > a")

            # self.total_data[title]["지방 유형 문화재"] = [e.attrs['title'] for e in culture]

            # print("유형문화재 : {}".format(self.total_data[title]["지방 유형 문화재"]))

            time.sleep(2)

            # 뒤로가기 -------------------------------------------------

            self.chrome_driver.execute_script("window.history.go(-1)")


        pprint.pprint (self.total_data)


    def jsonCreate(self):

        with open("ch_data.json", "w") as outfile:

            json.dump(self.total_data, outfile, ensure_ascii=False, indent='\t')


        outfile.close()

def main():

    urlnode = Req() # 객체 생성

    urlnode.urlRequests()

    urlnode.jsonCreate()

if __name__ == "__main__":

    main()









'언어 > python' 카테고리의 다른 글

project_openapi_ch  (0) 2018.12.31
test_code  (0) 2018.12.30
python json 파일 변환 - 기상청 데이터  (0) 2018.12.22
OPENAPI  (0) 2018.12.21
기록 1  (0) 2018.12.15


from openpyxl import load_workbook
import json
class Forcast:
    def __init__(self):
        self.wb = load_workbook(filename="C:\\Users\\sleep\\Desktop\\forcast.xlsx")
        self.ws = self.wb.active
        self.INDX = {'colum':[1 ,2 ,3 ,4 ,5], "row":2}
        self.korPos = {}

    '''
    row = 1 
    col = 1 ('A')
    '''
    def ReadXl(self):
        while True:
            # ==============================================================================
            t_stp1 = self.ws.cell(row=self.INDX["row"], column=self.INDX['colum'][0]).value  # 서울특별시
            t_stp2 = self.ws.cell(row=self.INDX["row"], column=self.INDX['colum'][1]).value  # 종로구
            t_stp3 = self.ws.cell(row=self.INDX["row"], column=self.INDX['colum'][2]).value  # 사직동
            t_stp4 = self.ws.cell(row=self.INDX["row"], column=self.INDX['colum'][3]).value  # 격자 X
            t_stp5 = self.ws.cell(row=self.INDX["row"], column=self.INDX['colum'][4]).value  # 격자 Y
            # ==============================================================================
            # case _01_01 ( 작업할 데이터가 없는 경우 )
            if not(t_stp1): break
            # case _01_02 ( 작업할 데이터가 있는 경우 )
            else:
                # case _02_01 ( 2단계와 3단계의 데이터가 있을 경우 )
                # 두개 데이터가 모두 있는 경우
                if t_stp2 and t_stp3:
                    # case _02_01_01
                    t = {t_stp2:{t_stp3:{"x":t_stp4, "y":t_stp5}}}
                    try:
                        self.korPos[t_stp1]
                    except:
                        self.korPos[t_stp1] = t
                    else:
                        try:
                            self.korPos[t_stp1][t_stp2]
                        except:
                            self.korPos[t_stp1][t_stp2] = t[t_stp2]
                        else:
                            self.korPos[t_stp1][t_stp2].update(t[t_stp2])
                else: # 데이터가 없는 경우
                    # 두개 데이터가 모두 없는 경우
                    if not (t_stp2) and not (t_stp3):
                        t = {"x": t_stp4, "y": t_stp5}
                        try:
                            self.korPos[t_stp1]
                        except: # 해당하는 데이터가 비어있는 경우
                            self.korPos[t_stp1] = t
                        else: # 해당 데이터가 있는 경우
                            # 그러면 중복이 되는지 확인하여야 한다.
                            # case
                            if t not in self.korPos[t_stp1].values():
                                self.korPos[t_stp1].update(t)
                    elif t_stp2 and not (t_stp3):
                        t = {t_stp2:{"x": t_stp4, "y": t_stp5}}
                        try:
                            self.korPos[t_stp1]
                        except: # 해당하는 데이터가 비어있는 경우
                            self.korPos[t_stp1] = t
                        else: # 해당 데이터가 있는 경우
                            # 그러면 중복이 되는지 확인하여야 한다.
                            # case
                            if t not in self.korPos[t_stp1].values():
                                self.korPos[t_stp1].update(t)


            self.INDX["row"] += 1

    def jsonCreate(self):
        with open("forcastInfo.json", "w") as outfile:
            json.dump(self.korPos, outfile, ensure_ascii=False, indent='\t')

        outfile.close()

def main():
    fnode = Forcast() # 객체 생성
    fnode.ReadXl()
    fnode.jsonCreate()
if __name__ == "__main__":
    main()





'언어 > python' 카테고리의 다른 글

test_code  (0) 2018.12.30
python => from : crawling to : json  (0) 2018.12.23
OPENAPI  (0) 2018.12.21
기록 1  (0) 2018.12.15
실패 -  (0) 2018.11.28

OPENAPI

언어/python2018. 12. 21. 17:46

import requests

from urllib.parse import urlencode

url = "http://newsky2.kma.go.kr/service/SecndSrtpdFrcstInfoService2/ForecastSpaceData?"

url += "serviceKey=OiP0xNoox9eLy - 일부 삭제 - jcV8NQ%3D%3D&"

params = {

    "base_date":"20181221",

    "base_time":"0500",

    "nx":60,

    "ny":127,

    "numOfRows":10,

    "pageNo":1,

    "_type":"json"

}


urlencoding = urlencode(params)

url = url + urlencoding

html = requests.get(url)

print (html.json())

'언어 > python' 카테고리의 다른 글

python => from : crawling to : json  (0) 2018.12.23
python json 파일 변환 - 기상청 데이터  (0) 2018.12.22
기록 1  (0) 2018.12.15
실패 -  (0) 2018.11.28
selenium + pandas + 연습 중  (0) 2018.11.15