Execute a forEach like a waterfall in async
I'm trying to retrieve longitude and latitude from a list of addresses with the Google API via a Node.js script. The call itself works fine but since I have around 100 addresses to submit. I use a async.forEach
on an array, but the calls are made too fast and I get the error "You have exceeded your rate-limit for this API."
I found that the number of calls is limited to 2500 every 24h and maximum 10 a second. While I'm OK for the 2500 a day, I make my calls way too fast for the rate limit.
I now have to write a function who will delay the calls enough not to reach the limit. Here is a sample of my code :
async.forEach(final_json, function(item, callback) {
var path = '/maps/api/geocode/json?address='+encodeURIComponent(item.main_address)+'&sensor=false';
console.log(path);
var options = {
host: 'maps.googleapis.com',
port: 80,
path: path,
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
}
// a function I have who makes the http GET
rest.getJSON(options, function(statusCode, res) {
console.log(res);
callback();
});
}, function() {
// do something once all the calls have been made
});
How would you proceed to achieve this? I tried putting my rest.getJSON
inside a 100ms setTimeout
but the forEach
iterates through all the rows so fast that it starts all the setTimeout
almost at the same time and therefore it doesn't change anything...
The async.waterfall
looks like it would do the trick, but the thing is I don't know exactly how many rows I will have, so I can't hardcode all the function calls. And to be honest, it would make my code really ugly
The idea is that you can create a rateLimited
function that acts much like a throttled
or debounced
function, except any calls that don't execute immediately get queued and run in order as the rate limit time period expires.
Basically, it creates parallel 1 second intervals that self-manage via timer rescheduling, but only up to perSecondLimit
intervals are allowed.
function rateLimit(perSecondLimit, fn) {
var callsInLastSecond = 0;
var queue = [];
return function limited() {
if(callsInLastSecond >= perSecondLimit) {
queue.push([this,arguments]);
return;
}
callsInLastSecond++;
setTimeout(function() {
callsInLastSecond--;
var parms;
if(parms = queue.shift()) {
limited.apply(parms[0], parms[1]);
}
}, 1010);
fn.apply(this, arguments);
};
}
Usage:
function thisFunctionWillBeCalledTooFast() {}
var limitedVersion = rateLimit(10, thisFunctionWillBeCalledTooFast);
// 10 calls will be launched immediately, then as the timer expires
// for each of those calls a new call will be launched in it's place.
for(var i = 0; i < 100; i++) {
limitedVersion();
}
Here's how I would hack it (Note: arr
is your array of locations):
function populate(arr, callback, pos) {
if(typeof pos == "undefined")
pos=0;
var path = '/maps/api/geocode/json?address='+encodeURIComponent(arr[pos].main_address)+'&sensor=false';
console.log(path);
var options = {
host: 'maps.googleapis.com',
port: 80,
path: path,
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
}
// a function I have who makes the http GET
rest.getJSON(options, function(statusCode, res) {
console.log(res);
});
pos++;
if(pos<arr.length)
setTimeout(function(){
populate(arr,callback,pos);
},110); //a little wiggle room since setTimeout isn't exact
else
callback();
}
You could add a rate limiting function, but, IMHO, it introduces unnecessary complexity. All you really want to do is call the function every tenth of a second or so until you're done with your list, so do that.
It's certainly not as extensible as the alternative, but I'm a fan of simplicity.
链接地址: http://www.djcxy.com/p/75560.html