selenium + pandas + 연습 중
# ===========================================================
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import pprint as ppr
import numpy as np
import re
# ===========================================================
options = Options() # 객체 생성
options.headless = True
driver = webdriver.Chrome(executable_path=r"C:\Users\sleep\Desktop\chrom_driver\chromedriver.exe",
chrome_options=options)
driver.get("https://ko.wikipedia.org/wiki/{0}".format("대한민국의_인구"))
assert "대한민국의 인구 - 위키백과, 우리 모두의 백과사전" in driver.title
print (driver.title)
html = driver.page_source
bsObject = BeautifulSoup(html, "html.parser")
f = bsObject.select('table.wikitable.sortable.jquery-tablesorter > tbody > tr')
info = dict()
for i in f:
year = i.select_one("td:nth-of-type(1)") # 년도
man_count = i.select_one("td:nth-of-type(2)") # 년도
year = re.sub("[\n, \t, \r]", "", str(year.string))
man_count = re.sub("[\n, \t, \r]", "", str(man_count.string))
info[year] = man_count
# ppr.pprint (info)
s = pd.Series(list(info.values()), index=list(info.keys()))
print (s)