data crawling

언어/python2019. 3. 3. 14:09

from bs4 import BeautifulSoup

from openpyxl import Workbook

from json import load

import requests

import time


from facebook.TargetString import TargetStr


class Facebook:


    def __init__(self):

        self.requestUrl = Facebook.jsnoFileRead()

        self.bsObject = self.urlRequests()

        self.targetString = TargetStr.target_string

        self.wrkBook = Workbook() # 엑셀 파일 생성


    @classmethod

    def jsnoFileRead(cls):

        try:

            f = open("./info.json", "r")

            json_doc = dict(load(f))

            url = "{url}/{path}?{param}".format(

                    url   = json_doc.get("url"),

                    path  = json_doc.get("path"),

                    param = json_doc.get("param"))


        except FileNotFoundError as e:

            print (e)

            f.close()

            exit(1)

        else:

            f.close()

            return url


    def urlRequests(self):

        html = requests.get(self.requestUrl)

        if html.status_code == 200:

            return BeautifulSoup(html.text, "html.parser")

        else:

            exit(1)


    def urlParcing(self):

        # work sheet 생성

        wrkSheet = self.wrkBook.create_sheet("decoding_list")


        table = self.bsObject.select("div#module-codecs > "

                                     "div#standard-encodings > "

                                     "table.docutils > "

                                     "tbody > "

                                     "tr")


        encoding_list = [ t.select_one("td").string for t in table ]

        for n, i in enumerate(encoding_list):

            result_text = self.targetString.decode(i, "ignore")

            # wrkSheet.cell(row=n+2, column=2).value = i

            # wrkSheet.cell(row=n+2, column=3).value = str(result_text)

            time.sleep(1)

            print (i, result_text)


    def __del__(self):

        self.wrkBook.save("facebook.xlsx")


def main():

    fnode = Facebook()

    fnode.jsnoFileRead()

    fnode.urlParcing()


if __name__ == "__main__":

    main()



'언어 > python' 카테고리의 다른 글

selenium_  (0) 2019.03.11
2019년 3월 9일 ( 주말 프로젝트 )  (0) 2019.03.09
python + 지하철 + 이미지  (0) 2019.02.24
pysimplegui  (0) 2019.02.10
python + crawling + elasticsearch  (0) 2019.02.04