2015년 자료 ( 인구 ) 파이썬 크롤링
언어/python2018. 7. 2. 15:42
#______________________________________________
import requests as req
from bs4 import BeautifulSoup
import pandas as pd
from pandas import Series, DataFrame
import pprint as ppr
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
#______________________________________________
class WebCawling():
def __init__(self):
self.urlTarget = "https://ko.wikipedia.org/wiki/대한민국의_인구순_도시_목록"
self.htmlNode = None
self.bsObject = None
self.infor_korea = dict()
self.barGraph = dict()
def servRequest(self):
self.htmlNode = req.get(url = self.urlTarget)
if self.htmlNode.status_code == 200:
self.bsObject = BeautifulSoup(self.htmlNode.text, "html.parser")
# 행정구역
qury = '#mw-content-text > div > table > tbody > tr > td:nth-of-type(2)'
# 지역
sub_qury = '#mw-content-text > div > table > tbody > tr > td:nth-of-type(3)'
# 인구명
ssub_qury = '#mw-content-text > div > table > tbody > tr > td:nth-of-type(4)'
a_tag, b_tag, c_tag = \
self.bsObject.select(qury), self.bsObject.select(sub_qury), self.bsObject.select(ssub_qury)
for i in zip(a_tag, b_tag, c_tag):
local_name = ""
for j in i[0]:
#print (j.string, end='')
local_name += j.string
# 지역
#mw-content-text > div > table > tbody > tr > td:nth-of-type(3)
man_count = int(str(i[2].string).replace(',',''))
self.infor_korea[local_name] = {"지역":i[1].string, "인구":man_count}
#print ("\n==============================")
#ppr.pprint (self.infor_korea)
def graph(self):
font_name = font_manager.FontProperties(fname='C:\\Windows\\Fonts\\malgun.ttf').get_name()
rc ('font', family = font_name)
for i in self.infor_korea.values():
if i['지역'] not in self.barGraph.keys():
self.barGraph[i['지역']] = i['인구']
else:
self.barGraph[i['지역']] += i['인구']
ppr.pprint(self.barGraph)
y_col = [y for y in self.barGraph.values()]
x_col = [x for x in self.barGraph.keys()]
plt.bar(x_col, y_col)
plt.show()
#_____________________________
def main():
# 객체 생성
webInstance = WebCawling()
webInstance.servRequest()
webInstance.graph()
if __name__ == "__main__":
main()
'언어 > python' 카테고리의 다른 글
selenium (0) | 2018.07.03 |
---|---|
크롤링 (0) | 2018.07.03 |
크롤링 + 조잡한 데이터 시각화 (0) | 2018.07.02 |
python + mysql (0) | 2018.06.29 |
크롤링 해시 추출 코드 일부분 (0) | 2018.06.28 |