Asynchronous call during a synchronous CasperJS operation

After allot of trouble (first timer nodejs and casperjs/phantomjs) it started to work. I made this work with curl(php).

This is what I try to accomplish:

  • Log in
  • Get all units
  • parse their details
  • (my problem) 2 unit details are provided by ajax calls
  • casper.start(url, function() {
          this.evaluate(function() {
              document.querySelector("input[name='username']").value = "username";
              document.querySelector("input[name='password']").value = "passwrd";
              document.querySelector("#login").click();
         });
         console.log("Logged in..");
    });
    
    var processPage = function() {
        console.log("Get all units..");
        var units = this.evaluate(getUnits);
        allUnits.push(units);
    
        if (!this.evaluate(isLastPage)) {
            this.thenClick('.paging li:last-child a').then(function() {
                currentPage++;
                console.log("Stepping to page.. " + currentPage);
                this.waitFor(function() {
                    return currentPage === this.evaluate(getSelectedPage);
                }, processPage, terminate);
            });
        } else{
            require('utils').dump(allUnits);
            casper.then(function() {
                 this.capture('test.png');
            });
            console.log("Total unit count: " + allUnits.length);
        }
    };
    
    
    
     casper.waitForSelector('.units', processPage, terminate);
     casper.run();
    

    In the following function I parse the rows and I want to add the 2 details that are fetched by ajax too but i dont know how to do it. (async)

    function getUnits() {
        var rows = document.querySelectorAll('.units');
        var units = [];
    
        for (var i = 0, row; row = rows[i]; i++) {
            var aID = row.querySelector('a').getAttribute('href').split('/');
            unit['id'] = aID[2];
            //add other details for the unit
    
            **//Do a async call to the 2 external links with the ID and add the details to the unit**
    
            units.push(unit);
        } 
    
        return units;
    
    };
    

    What is important to note is that at the end i want to run another function on the units but all must be already fetched by then before running it...

    EDIT

    After login the page shows a table and I am getting that table its like this

  • ID
  • OWNER
  • STREET
  • BEINGFOLLOWED (automatic ajax call that is done to a link with post)
  • PLACEDABIDON (automatic ajax call that is done to a link with post)
  • I tried to get the last 2 fields with casper normally but sometimes it got the value and sometimes it didnt (request sometimes too slow)

    Want I wanted to know is how can you get these fields without waiting for each row(unit) till it got the value. (So every unit should get the values for them selfs and fill them in their object. So probably a callback is needed ?

    Or i can do the request my self i just need the ID and the cookie to do the post (the link takes the ID and Cookie as parameter) and get the details and fill it in but I dont know how to do that or if the first solution works better or even if this is possible...

    The most important thing is that after all units have their details it should continue with the app's logic...


    Since PhantomJS (and CasperJS) has two contexts, it is easy to break out of the execution flow.

    I see two ways to solve your problem.

    1. Send the request yourself

    You need to trigger the requests inside of the page context (inside of evaluate() ) and let the outer context wait for the result. I assume that you can have a success callback in the page context.

    You have to put the result of the external requests somewhere global so that the outer context can get to it. For example, modify your getUnits() function like this:

    function getUnits() {
        var rows = document.querySelectorAll('.units');
        var units = [];
        window.__externalRequestResults = [[], []];
    
        for (var i = 0, row; row = rows[i]; i++) {
            var aID = row.querySelector('a').getAttribute('href').split('/');
            unit['id'] = aID[2];
            //add other details for the unit
    
            //Do a async call to the 2 external links with the ID and add the details to the unit
            (function(i){
                var xhr = new XMLHttpRequest();
                xhr.open("GET", someURLwithParameters, true);
                xhr.onreadystatechange = function(){
                    if (xhr.readyState === 4) { // DONE
                        __externalRequestResults[0][i] = xhr.responseText;
                    }
                };
    
                xhr = new XMLHttpRequest();
                xhr.open("GET", someOtherURLwithParameters, true);
                xhr.onreadystatechange = function(){
                    if (xhr.readyState === 4) { // DONE
                        __externalRequestResults[1][i] = xhr.responseText;
                    }
                };
                xhr.send();
            })(i);
    
            units.push(unit);
        } 
    
        return units;
    };
    

    Now you can retrieve the immediate results and then wait for additional results:

    var processPage = function() {
        console.log("Get all units..");
        var units = this.evaluate(getUnits);
        var numberOfRows = this.getElementsInfo("table tr").length; // TODO: fix selector
        var externalUnits;
        this.waitFor(function test(){
            externalUnits = this.getGlobal("__externalRequestResults");
            for(var i = 0; i < numberOfRows; i++) {
                if (externalUnits[0][i] == null || externalUnits[1][i] == null) {
                    return false
                }
            }
            return true;
        }, function _then(){
            allUnits.push(units);
            allUnits.push(externalUnits); // TODO: maybe a little differently
    
            if (!this.evaluate(isLastPage)) {
                //... as before
            } else{
                //... as before
            }
        }, terminate);
    };
    

    The end of the waiting period is triggered when the number of both lists of additional data is the same size as the table has number of rows and all are filled. This creates a sparse array and Array#push() cannot be used, because the different Ajax requests may come in a different ordering than in which they were sent.

    2. Let the browser handle the request

    In the second case you let CasperJS wait for all data to come in. The challenge is to write a check function that does this.

    var processPage = function() {
        console.log("Get all units..");
        var numberOfRows = this.getElementsInfo("table tr").length; // TODO: fix selector
        this.waitFor(function test(){
            var data1, data2;
            for(var i = 1; i <= numberOfRows; i++) {
                data1 = this.fetchText("table tr:nth-child("+i+") td:nth-child(4)") || "";
                data2 = this.fetchText("table tr:nth-child("+i+") td:nth-child(5)") || "";
                if (data1.trim() === "" || data2.trim() === "") {
                    return false
                }
            }
            return true;
        }, function _then(){
            var units = this.evaluate(getUnits);
            allUnits.push(units);
    
            if (!this.evaluate(isLastPage)) {
                //... as before
            } else{
                //... as before
            }
        }, terminate);
    };
    

    Now, you don't even need to make Ajax requests inside getUnits() and can simply collect all the static information.

    Don't forget to make the failed waiting timeout big enough that all Ajax requests complete in time. For example 3 or 4 times larger than the normal time for all ajax requests to load. You can use the global casper.options.waitTimeout for that.

    链接地址: http://www.djcxy.com/p/73486.html

    上一篇: Casperjs web服务多线程

    下一篇: 在同步CasperJS操作期间进行异步调用