2018年10月7日 星期日

[Python] 從USGS下載地震資料

為了分析地震地點的地理名稱, 使用Python語言, 從USGS下載地震資料存成文字檔. 因 USGS 使用逗點作為分隔, 因此這裡使用 $ 錢號來做輸出資料區隔, 以避免資料混淆, 亦可匯入 excel 進行整理. 輸出資料有兩個欄位, 第一個是原始地震資料來源, 第二個是擷取的地點名稱, 打算做連續地點關聯性分析, 以及加上自動翻譯為中文, 作為資料與動畫分析時自動匯入使用


# -*- coding: utf-8 -*-
"""
Created on Sun Oct  7 11:53:25 2018
@author: ghosty
@Program: usus_location_list.py
@Prupose: download USGS data and analyze the quake location name for
          (1) location relation analysis
          (2) translation from English to Traditional Chinese, used for AI analysis tool development
"""

import csv
import requests
import os.path
import datetime
#import dateutil
from dateutil.relativedelta import relativedelta

#init global variables
locFileName = 'usgs_loc.txt'
locationList=[]
quakeList=[]
#
  
def downloadUSGS(starttime, endtime, minmagnitude, maxmagnitude):  #,latitude=121.1,longitude=23.5,maxradiuskm=20000):
    url = 'http://earthquake.usgs.gov/fdsnws/event/1/query'

    query = {
            'format': 'csv',
            'starttime': starttime,
            'endtime': endtime,
            'minmagnitude': '5',
            'eventtype':'earthquake'
            }

    response = requests.get(url, params=query)
    if  response.status_code != requests.codes.ok:
        print("USGS requesr fail")
        return
    #print("encoding: %s" % response.encoding)
    quakeList = list(csv.reader(response.text.split('\n'), delimiter=','))
    header=quakeList[0]
    #time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,net,id,updated,place,type,horizontalError,depthError,magError,magNst,status,locationSource,magSource
    quakeList.pop(0) #remove header line
    print('USGS download #',len(quakeList))
    return quakeList, header


def addLocation(quakeData):
    for quake in quakeData:
        #print('quake:',quake)
        if (len(quake)==0): #skip null record
            continue
        quakeTime = quake[0]
        quakeLatitude = quake[1]
        quakeLongitude = quake[2]
        quakeDepth = quake[3]
        quakeMag = quake[4]
        quakeMagType = quake[5]
        quakePlace = quake[13]
        #print(quakeTime, quakeLatitude, quakeLongitude, quakeDepth, quakeMag, quakeMagType, quakePlace)
        locs = quakePlace.split(',')
        #print(locs) #debug for 2013-02-01
        for loc in locs:
            try:
                ofPos = loc.find(" of ")    #do not usr 'of' only to prevent error
                if (ofPos>0):
                    loc=loc[ofPos+4:]
                #loc.replace("the ","")
                loc.replace("the ","")
                #loc.strip() #remove space, strip() fail to use
                while (loc[0]==' '):
                      loc=loc[1:]
                if (loc not in locationList):
                    quake = '{0} {1} {2} {3} {4} {5} {6}'.format(quakeTime, quakeLatitude, quakeLongitude, quakeDepth, quakeMag, quakeMagType, quakePlace)
                    print("Add [",loc, '] from [',quake,']')
                    quakeList.append(quake)
                    locationList.append(loc) 
                    #print(len(quakeList),len(locationList))
            except:
                #source data error
                print('parse fail: ',quakeTime, quakeLatitude, quakeLongitude, quakeDepth, quakeMag, quakeMagType, quakePlace)
               
def saveLocation(locationList, locFileName):
    #print(len(locationList))   
    print("export ",len(locationList)," locations")
    with open(locFileName, 'w') as locfile:
        for i in range(len(locationList)):
            locfile.write('\"'+str(quakeList[i]) + '\"$\"' + str(locationList[i]) + '\"\n') 

def loadLocation(locationList, locFileName):
    if (os.path.exists(locFileName)):
        with open (locFileName, 'r') as locfile:
           for line in locfile:
               quakeData = line.strip().split('$')
               quakeList.append(quakeData[0].strip('\"'))
               locationList.append(quakeData[1].strip('\"'))
        print("import ",len(locationList)," locations")
       
#------main()--------
# Load processed location data
loadLocation(locationList, locFileName) 
#for all date
dt1 = datetime.date(1900, 1, 1)
#end_dt =  datetime.date(2000, 1, 31)
end_dt =  datetime.date.today()
while (dt1<end_dt): # in daterange(start_dt, end_dt):
    """
    # if there are too many records at one download, reduce the download period
    if (dt1 < datetime.date(2018, 1, 1)):
        delta_dt = relativedelta(years=1)
    else:
        delta_dt = relativedelta(months=1)
    """
    delta_dt = relativedelta(years=1)
    dt2 = dt1 + delta_dt
    print("\nProcessing...",dt1.strftime("%Y-%m-%d"),"~",dt2.strftime("%Y-%m-%d"))
    quakeDATA, header = downloadUSGS(dt1, dt2, 4.0, 10)
    addLocation(quakeDATA) 
    dt1 = dt2         

#save processed result
saveLocation(locationList, locFileName)

沒有留言:

張貼留言