diff options
| author | Petter Reinholdtsen <pere@hungry.com> | 2015-02-12 22:40:10 +0100 | 
|---|---|---|
| committer | Petter Reinholdtsen <pere@hungry.com> | 2015-02-12 22:40:10 +0100 | 
| commit | 4010c1ece08cd3297830fdf8cd7dcb8eb57ca559 (patch) | |
| tree | a5a19cf938886fd1849a62178d6dc4b54eb5fbd3 | |
Start on framework.
| -rw-r--r-- | README | 6 | ||||
| -rw-r--r-- | netsniff.js | 143 | ||||
| -rwxr-xr-x | runcheck | 7 | ||||
| -rw-r--r-- | testurls.txt | 5 | 
4 files changed, 161 insertions, 0 deletions
| @@ -0,0 +1,6 @@ +Extract HAR formatted information for Norwegian web sites +========================================================= + +Using PhantomJS and the +https://github.com/ariya/phantomjs/blob/master/examples/netsniff.js +script. diff --git a/netsniff.js b/netsniff.js new file mode 100644 index 0000000..b702543 --- /dev/null +++ b/netsniff.js @@ -0,0 +1,143 @@ +if (!Date.prototype.toISOString) { +    Date.prototype.toISOString = function () { +        function pad(n) { return n < 10 ? '0' + n : n; } +        function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n } +        return this.getFullYear() + '-' + +            pad(this.getMonth() + 1) + '-' + +            pad(this.getDate()) + 'T' + +            pad(this.getHours()) + ':' + +            pad(this.getMinutes()) + ':' + +            pad(this.getSeconds()) + '.' + +            ms(this.getMilliseconds()) + 'Z'; +    } +} + +function createHAR(address, title, startTime, resources) +{ +    var entries = []; + +    resources.forEach(function (resource) { +        var request = resource.request, +            startReply = resource.startReply, +            endReply = resource.endReply; + +        if (!request || !startReply || !endReply) { +            return; +        } + +        // Exclude Data URI from HAR file because +        // they aren't included in specification +        if (request.url.match(/(^data:image\/.*)/i)) { +            return; +	} + +        entries.push({ +            startedDateTime: request.time.toISOString(), +            time: endReply.time - request.time, +            request: { +                method: request.method, +                url: request.url, +                httpVersion: "HTTP/1.1", +                cookies: [], +                headers: request.headers, +                queryString: [], +                headersSize: -1, +                bodySize: -1 +            }, +            response: { +                status: endReply.status, +                statusText: endReply.statusText, +                httpVersion: "HTTP/1.1", +                cookies: [], +                headers: endReply.headers, +                redirectURL: "", +                headersSize: -1, +                bodySize: startReply.bodySize, +                content: { +                    size: startReply.bodySize, +                    mimeType: endReply.contentType +                } +            }, +            cache: {}, +            timings: { +                blocked: 0, +                dns: -1, +                connect: -1, +                send: 0, +                wait: startReply.time - request.time, +                receive: endReply.time - startReply.time, +                ssl: -1 +            }, +            pageref: address +        }); +    }); + +    return { +        log: { +            version: '1.2', +            creator: { +                name: "PhantomJS", +                version: phantom.version.major + '.' + phantom.version.minor + +                    '.' + phantom.version.patch +            }, +            pages: [{ +                startedDateTime: startTime.toISOString(), +                id: address, +                title: title, +                pageTimings: { +                    onLoad: page.endTime - page.startTime +                } +            }], +            entries: entries +        } +    }; +} + +var page = require('webpage').create(), +    system = require('system'); + +if (system.args.length === 1) { +    console.log('Usage: netsniff.js <some URL>'); +    phantom.exit(1); +} else { + +    page.address = system.args[1]; +    page.resources = []; + +    page.onLoadStarted = function () { +        page.startTime = new Date(); +    }; + +    page.onResourceRequested = function (req) { +        page.resources[req.id] = { +            request: req, +            startReply: null, +            endReply: null +        }; +    }; + +    page.onResourceReceived = function (res) { +        if (res.stage === 'start') { +            page.resources[res.id].startReply = res; +        } +        if (res.stage === 'end') { +            page.resources[res.id].endReply = res; +        } +    }; + +    page.open(page.address, function (status) { +        var har; +        if (status !== 'success') { +            console.log('FAIL to load the address'); +            phantom.exit(1); +        } else { +            page.endTime = new Date(); +            page.title = page.evaluate(function () { +                return document.title; +            }); +            har = createHAR(page.address, page.title, page.startTime, page.resources); +            console.log(JSON.stringify(har, undefined, 4)); +            phantom.exit(); +        } +    }); +} diff --git a/runcheck b/runcheck new file mode 100755 index 0000000..b9f54fc --- /dev/null +++ b/runcheck @@ -0,0 +1,7 @@ +#!/bin/sh + +cat testurls.txt | while read url ; do +    filename=$(echo "$url" | cut -d/ -f3-| sed 's%/$%%' |tr / %) +    echo "$url" +    phantomjs netsniff.js "$url" > "har-data/$filename.har" +done diff --git a/testurls.txt b/testurls.txt new file mode 100644 index 0000000..05e6e88 --- /dev/null +++ b/testurls.txt @@ -0,0 +1,5 @@ +http://www.nrk.no/ +http://www.stortinget.no/ +http://www.aftenposten.no/ +http://www.vg.no/ +http://www.usit.uio.no/ | 
