Rather than go into patching anything, I managed to get this to work:
r.on('request', function (req) {
req.on('socket', function () {
var oldOnData = req.socket.ondata;
var first_packet = true;
req.socket.ondata = function (d, start, end) {
if (first_packet) {
first_packet = false;
var pos = d.indexOf("Content-Length:", start);
if (pos === -1) {
return oldOnData.apply(req.socket, arguments);
}
var seen_comma = false;
var i = pos + start + 15;
while (i < end && d[i] !== 0x0a) {
console.log("Saw: " + String.fromCharCode(d[i]) + "
(" + d[i] + ") at pos: " + i, "blue");
if (d[i] === 44) {
seen_comma = true;
}
if (seen_comma) {
d[i] = 32; // set to space
}
i++;
}
}
return oldOnData.apply(req.socket, arguments);
}
})
})
Hacky and a bit nasty, but works, at least with node 0.6 (have to check if
the same process applies on 0.8).
On Tue, Jan 8, 2013 at 3:18 PM, Marcel Laverdet <[email protected]> wrote:
> Apply this patch:
> https://gist.github.com/4487528
>
> Node shouldn't be barfing on anything a browser can display and should
> really be more tolerant of these failures. I should submit a PR.. but not
> sure if this will cause other issues down the road.
>
> On Tue, Jan 8, 2013 at 12:42 PM, Matt <[email protected]> wrote:
>
>> We're doing web scraping using node and coming across an issue that we
>> cannot fetch a particular URL on a particular web site, because it sends
>> back: "Content-Length: 1234,1234"
>>
>> I totally understand that node's http parser doesn't deal with this, and
>> throws an error, but is there any way we can intercept this and fix it up?
>> The only way I can think of is using a proxy written in another language,
>> which seems like a sucky solution.
>>
>> Thoughts?
>>
>> Here's some test code to demonstrate this:
>>
>> var assert = require('assert');
>> var http = require('http');
>>
>> var seen_req = false;
>>
>> var server = http.createServer(function(req, res) {
>> assert.equal('GET', req.method);
>> assert.equal('/foo?bar', req.url);
>> res.writeHead(200, {'Content-Type': 'text/plain', 'Content-Length':
>> '6,6'});
>> res.write('hello\n');
>> res.end();
>> server.close();
>> seen_req = true;
>> });
>>
>> server.listen(12345, function() {
>> http.get('http://127.0.0.1:' + 12345 + '/foo?bar');
>> });
>>
>> process.on('exit', function() {
>> assert(seen_req);
>> });
>>
>> --
>> Job Board: http://jobs.nodejs.org/
>> Posting guidelines:
>> https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
>> You received this message because you are subscribed to the Google
>> Groups "nodejs" group.
>> To post to this group, send email to [email protected]
>> To unsubscribe from this group, send email to
>> [email protected]
>> For more options, visit this group at
>> http://groups.google.com/group/nodejs?hl=en?hl=en
>>
>
> --
> Job Board: http://jobs.nodejs.org/
> Posting guidelines:
> https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
> You received this message because you are subscribed to the Google
> Groups "nodejs" group.
> To post to this group, send email to [email protected]
> To unsubscribe from this group, send email to
> [email protected]
> For more options, visit this group at
> http://groups.google.com/group/nodejs?hl=en?hl=en
>
--
Job Board: http://jobs.nodejs.org/
Posting guidelines:
https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
You received this message because you are subscribed to the Google
Groups "nodejs" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/nodejs?hl=en?hl=en