1
누구든지이 코드를 수정하도록 도와 줄 수 있습니까? 작년에 문제없이 스크립트를 사용했지만 지금은 URL에 연결하는 데 문제가 있습니다.R 기능을 사용하여 Wunderground에서 과거 기상 데이터를 어떻게 수집합니까?
어떻게 수정합니까?
내가 원하는 것은 기상 관측소 "EKAH"(Tirstrup, Aarhus Airport, Denmark)에서 2015-12-01부터 2016-04-15까지의 데이터를 수집하고 정렬하는 것입니다. 난 그냥 새로운 하나에 대한 나쁜 URL을 자막 처리 수 있었다 생각하면서
############## 1) Run function --------------------
wunder_station_daily <- function(station, date)
{
base_url <- 'https://www.wunderground.com/history/airport'
# Example website: https://www.wunderground.com/history/airport/EKAH/2016/06/09/DailyHistory.html?&MR=1
# parse date
m <- as.integer(format(date, '%m'))
d <- as.integer(format(date, '%d'))
y <- format(date, '%Y')
# compose final url
final_url <- paste(base_url,
'/', station,
'/', y,
'/', m,
'/', d,
'/DailyHistory.html?&MR=1', sep='')
# reading in as raw lines from the web server
# contains <br> tags on every other line
# u <- url(final_url)
# the_data <- readLines(u)
# close(u)
the_data <- getURL(final_url, ssl.verifypeer=0L, followlocation=1L)
# only keep records with more than 5 rows of data
if(length(the_data) > 5)
{
# remove the first and last lines
the_data <- the_data[-c(1, length(the_data))]
# remove odd numbers starting from 3 --> end
the_data <- the_data[-seq(3, length(the_data), by=2)]
# extract header and cleanup
the_header <- the_data[1]
the_header <- make.names(strsplit(the_header, ',')[[1]])
# convert to CSV, without header
tC <- textConnection(paste(the_data, collapse='\n'))
the_data <- read.csv(tC, as.is=TRUE, row.names=NULL, header=FALSE, skip=1)
close(tC)
# remove the last column, created by trailing comma
the_data <- the_data[, -ncol(the_data)]
# assign column names
names(the_data) <- the_header
# convert Time column into properly encoded date time
the_data$Time <- as.POSIXct(strptime(the_data$Time, format='%Y-%m-%d %H:%M:%S'))
# remove UTC and software type columns
the_data$DateUTC.br. <- NULL
the_data$SoftwareType <- NULL
# sort and fix rownames
the_data <- the_data[order(the_data$Time), ]
row.names(the_data) <- 1:nrow(the_data)
# done
return(the_data)
}
}
############## 2) Get data for a range of dates ------------------------------
date.range <- seq.Date(from=as.Date('2015-12-01'), to=as.Date('2015-12-04'), by='1 day')
station <- 'EKAH'
# pre-allocate list
l <- vector(mode='list', length=length(date.range))
# loop over dates, and fetch data
for(i in seq_along(date.range))
{
print(paste0("Fetching data: ", date.range[i]))
l[[i]] <- wunder_station_daily('EKAH', date.range[i])
}
# stack elements of list into DF, filling missing columns with NA
d <- ldply(l)