Rather than go into patching anything, I managed to get this to work:

    r.on('request', function (req) {
        req.on('socket', function () {
            var oldOnData = req.socket.ondata;
            var first_packet = true;
            req.socket.ondata = function (d, start, end) {
                if (first_packet) {
                    first_packet = false;
                    var pos = d.indexOf("Content-Length:", start);
                    if (pos === -1) {
                        return oldOnData.apply(req.socket, arguments);
                    }
                    var seen_comma = false;
                    var i = pos + start + 15;
                    while (i < end && d[i] !== 0x0a) {
                        console.log("Saw: " + String.fromCharCode(d[i]) + "
(" + d[i] + ") at pos: " + i, "blue");
                        if (d[i] === 44) {
                            seen_comma = true;
                        }
                        if (seen_comma) {
                            d[i] = 32; // set to space
                        }
                        i++;
                    }
                }
                return oldOnData.apply(req.socket, arguments);
            }
        })
    })

Hacky and a bit nasty, but works, at least with node 0.6 (have to check if
the same process applies on 0.8).


On Tue, Jan 8, 2013 at 3:18 PM, Marcel Laverdet <[email protected]> wrote:

> Apply this patch:
> https://gist.github.com/4487528
>
> Node shouldn't be barfing on anything a browser can display and should
> really be more tolerant of these failures. I should submit a PR.. but not
> sure if this will cause other issues down the road.
>
> On Tue, Jan 8, 2013 at 12:42 PM, Matt <[email protected]> wrote:
>
>> We're doing web scraping using node and coming across an issue that we
>> cannot fetch a particular URL on a particular web site, because it sends
>> back: "Content-Length: 1234,1234"
>>
>>  I totally understand that node's http parser doesn't deal with this, and
>> throws an error, but is there any way we can intercept this and fix it up?
>> The only way I can think of is using a proxy written in another language,
>> which seems like a sucky solution.
>>
>> Thoughts?
>>
>> Here's some test code to demonstrate this:
>>
>> var assert = require('assert');
>> var http = require('http');
>>
>> var seen_req = false;
>>
>> var server = http.createServer(function(req, res) {
>>   assert.equal('GET', req.method);
>>   assert.equal('/foo?bar', req.url);
>>   res.writeHead(200, {'Content-Type': 'text/plain', 'Content-Length':
>> '6,6'});
>>   res.write('hello\n');
>>   res.end();
>>   server.close();
>>   seen_req = true;
>> });
>>
>> server.listen(12345, function() {
>>   http.get('http://127.0.0.1:' + 12345 + '/foo?bar');
>> });
>>
>> process.on('exit', function() {
>>   assert(seen_req);
>> });
>>
>>  --
>> Job Board: http://jobs.nodejs.org/
>> Posting guidelines:
>> https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
>> You received this message because you are subscribed to the Google
>> Groups "nodejs" group.
>> To post to this group, send email to [email protected]
>> To unsubscribe from this group, send email to
>> [email protected]
>> For more options, visit this group at
>> http://groups.google.com/group/nodejs?hl=en?hl=en
>>
>
>  --
> Job Board: http://jobs.nodejs.org/
> Posting guidelines:
> https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
> You received this message because you are subscribed to the Google
> Groups "nodejs" group.
> To post to this group, send email to [email protected]
> To unsubscribe from this group, send email to
> [email protected]
> For more options, visit this group at
> http://groups.google.com/group/nodejs?hl=en?hl=en
>

-- 
Job Board: http://jobs.nodejs.org/
Posting guidelines: 
https://github.com/joyent/node/wiki/Mailing-List-Posting-Guidelines
You received this message because you are subscribed to the Google
Groups "nodejs" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/nodejs?hl=en?hl=en

Reply via email to