|
# -*- coding: utf-8 -*-
"""
Created on Sun Oct 15 11:53:25 2018
@author: ghosty
@Program: cwbweb_list.py
@Prupose: download quake data from CWB
web source: https://www.cwb.gov.tw/V7/earthquake/rtd_eq.htm
"""
import requests
from bs4 import BeautifulSoup
#import dateutil
def downloadCWBweb(year,month):
url =
'https://scweb.cwb.gov.tw/Page.aspx/?ItemId=20&loc=tw&adv=1'
agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:34.0) Gecko/20100101
Firefox/34.0'
headers = {'Content-type': 'application/x-www-form-urlencoded',
'Accept':
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': agent}
payload = {
'__VIEWSTATE':'',
'__VIEWSTATEGENERATOR':'',
'__VIEWSTATEENCRYPTED':'',
'__EVENTVALIDATION':'',
'ctl03_ddlYear':'',
'ctl03_ddlMonth':'',
'ctl03_btnSearch':''
}
response1 = requests.post(url)
if response1.status_code !=
requests.codes.ok:
print("CWB requesr fail")
return
soup = BeautifulSoup(response1.text, "lxml")
payload['__VIEWSTATE']=soup.find('input',id='__VIEWSTATE')['value']
payload['__VIEWSTATEGENERATOR']=soup.find('input',id='__VIEWSTATEGENERATOR')['value']
payload['__VIEWSTATEENCRYPTE']=soup.find('input',id='__VIEWSTATEENCRYPTED')['value']
payload['__EVENTVALIDATION']=soup.find('input',id='__EVENTVALIDATION')['value']
payload['ctl03$ddlYear']="{:4d}".format(year)
payload['ctl03$ddlMonth']="{:0>2d}".format(month)
payload['ctl03$btnSearch']=''
response2 = requests.post(url,data=payload, headers=headers)
soup2 = BeautifulSoup(response2.text, "lxml")
table = soup2.find('table', attrs={'class':'datalist4'})
rows = table.find_all('tr')
quakeData = []
for row in rows:
cols = row.find_all('td')
cols = [item.text.strip() for item in cols]
if (len(cols)>0): #skip empty row
quakeData.append([item for item
in cols if item]) # Get rid of empty values
return quakeData
quakeData = downloadCWBweb(2018,8)
for data in quakeData:
print(data)
|
2018年10月15日 星期一
[Python] 從中央氣象局下載地震活動彙整列表
氣象局網站的 地震活動彙整 列表, 檢視網頁內容, 真正資料網頁連結為 https://scweb.cwb.gov.tw/Page.aspx/?ItemId=20&loc=tw&adv=1, 因為是ASP網頁, 所以需先取出ASP的傳遞參數, 資料表格之 class 為 datalist4, 使用 BeautifulSoup 即可取出表格內容
訂閱:
張貼留言 (Atom)
沒有留言:
張貼留言