data crawling

언어/python2019. 3. 3. 14:09

뷰어
댓글로
이전글
다음글

from bs4 import BeautifulSoup

from openpyxl import Workbook

from json import load

import requests

import time

from facebook.TargetString import TargetStr

class Facebook:

def __init__(self):

self.requestUrl = Facebook.jsnoFileRead()

self.bsObject = self.urlRequests()

self.targetString = TargetStr.target_string

self.wrkBook = Workbook() # 엑셀 파일 생성

@classmethod

def jsnoFileRead(cls):

try:

f = open("./info.json", "r")

json_doc = dict(load(f))

url = "{url}/{path}?{param}".format(

url = json_doc.get("url"),

path = json_doc.get("path"),

param = json_doc.get("param"))

except FileNotFoundError as e:

print (e)

f.close()

exit(1)

else:

f.close()

return url

def urlRequests(self):

html = requests.get(self.requestUrl)

if html.status_code == 200:

return BeautifulSoup(html.text, "html.parser")

else:

exit(1)

def urlParcing(self):

# work sheet 생성

wrkSheet = self.wrkBook.create_sheet("decoding_list")

table = self.bsObject.select("div#module-codecs > "

"div#standard-encodings > "

"table.docutils > "

"tbody > "

"tr")

encoding_list = [ t.select_one("td").string for t in table ]

for n, i in enumerate(encoding_list):

result_text = self.targetString.decode(i, "ignore")

# wrkSheet.cell(row=n+2, column=2).value = i

# wrkSheet.cell(row=n+2, column=3).value = str(result_text)

time.sleep(1)

print (i, result_text)

def __del__(self):

self.wrkBook.save("facebook.xlsx")

def main():

fnode = Facebook()

fnode.jsnoFileRead()

fnode.urlParcing()

if __name__ == "__main__":

main()

저작자표시 비영리 변경금지 (새창열림)

'언어 > python' 카테고리의 다른 글

selenium_ (0)	2019.03.11
2019년 3월 9일 ( 주말 프로젝트 ) (0)	2019.03.09
python + 지하철 + 이미지 (0)	2019.02.24
pysimplegui (0)	2019.02.10
python + crawling + elasticsearch (0)	2019.02.04

일	월	화	수	목	금	토
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30	31

길

data crawling

'언어 > python' 카테고리의 다른 글

최근에 올라온 글

최근에 달린 댓글

공지사항

글 보관함

링크

티스토리툴바