언어/python

selenium + pandas + 연습 중

파아랑새 2018. 11. 15. 23:43

# ===========================================================

from selenium import webdriver

from selenium.webdriver.chrome.options import Options

from bs4 import BeautifulSoup

import pandas as pd

import pprint as ppr

import numpy as np

import re

# ===========================================================

options = Options() # 객체 생성

options.headless = True

driver = webdriver.Chrome(executable_path=r"C:\Users\sleep\Desktop\chrom_driver\chromedriver.exe",

                          chrome_options=options)


driver.get("https://ko.wikipedia.org/wiki/{0}".format("대한민국의_인구"))

assert "대한민국의 인구 - 위키백과, 우리 모두의 백과사전" in driver.title

print (driver.title)


html = driver.page_source

bsObject = BeautifulSoup(html, "html.parser")


f = bsObject.select('table.wikitable.sortable.jquery-tablesorter > tbody > tr')

info = dict()

for i in f:

    year = i.select_one("td:nth-of-type(1)") # 년도

    man_count = i.select_one("td:nth-of-type(2)")  # 년도

    year = re.sub("[\n, \t, \r]", "",  str(year.string))

    man_count = re.sub("[\n, \t, \r]", "",  str(man_count.string))

    info[year] = man_count


# ppr.pprint (info)



s = pd.Series(list(info.values()), index=list(info.keys()))

print (s)