Awesome, this approach totally work, thank you very much:



worker_processes  auto;

error_log  /var/log/nginx/error.log warn;
pid        /var/run/nginx.pid;

events {
    worker_connections  1024;
}

http {
    default_type  application/octet-stream;

    log_format  main  '$remote_addr - $remote_user [$time_local] "$request"
'
                      '$status $body_bytes_sent "$http_referer" '
                      '"$http_user_agent" "$http_x_forwarded_for"';

    access_log  /var/log/nginx/access.log  main;

    sendfile        on;

    keepalive_timeout  65;

    gzip on;
    gzip_types application/javascript;
    gzip_buffers 32 8k;


    map $http_user_agent $limit_bots {
      default 0;
      ~*(google|bing|yandex|msnbot) 1;
     
~*(AltaVista|Googlebot|Slurp|BlackWidow|Bot|ChinaClaw|Custo|DISCo|Download|Demon|eCatch|EirGrabber|EmailSiphon|EmailWolf|SuperHTTP|Surfbot|WebWhacker)
1;
     
~*(Express|WebPictures|ExtractorPro|EyeNetIE|FlashGet|GetRight|GetWeb!|Go!Zilla|Go-Ahead-Got-It|GrabNet|Grafula|HMView|Go!Zilla|Go-Ahead-Got-It)
1;
     
~*(rafula|HMView|HTTrack|Stripper|Sucker|Indy|InterGET|Ninja|JetCar|Spider|larbin|LeechFTP|Downloader|tool|Navroad|NearSite|NetAnts|tAkeOut|WWWOFFLE)
1;
     
~*(GrabNet|NetSpider|Vampire|NetZIP|Octopus|Offline|PageGrabber|Foto|pavuk|pcBrowser|RealDownload|ReGet|SiteSnagger|SmartDownload|SuperBot|WebSpider)
1;
     
~*(Teleport|VoidEYE|Collector|WebAuto|WebCopier|WebFetch|WebGo|WebLeacher|WebReaper|WebSauger|eXtractor|Quester|WebStripper|WebZIP|Wget|Widow|Zeus)
1;
     
~*(Twengabot|htmlparser|libwww|Python|perl|urllib|scan|Curl|email|PycURL|Pyth|PyQ|WebCollector|WebCopy|webcraw)
1;
    }

    server {
        listen       8080;
        server_name  localhost;

                    root   /usr/share/nginx/html;

        server_tokens off;

        if ($limit_bots = 1){ rewrite ^ /puppeteer/download/html/ break; }

        location = /puppeteer/download/html/ {
          internal;
          proxy_pass http://localhost:3000;
          proxy_method GET;
          proxy_set_header content-type "application/json";
          proxy_pass_request_body off;
          proxy_set_body "{\"url\":\"https://example.com/$request_uri\"}";;
        }

        location ~ /index.html|.*\.json$ {  # Don't cache index.html and
*json files
          expires -1;
          add_header Cache-Control 'no-store, no-cache, must-revalidate,
proxy-revalidate, max-age=0';
          include /etc/nginx/security-headers.conf;
        }

        location ~ .*\.css$|.*\.js$ {
          add_header Cache-Control 'max-age=31449600'; # one year as we
don't care about this files because of cache boosting
          include /etc/nginx/security-headers.conf;
        }

        location / {
          try_files $uri$args $uri$args/ /index.html;   # Will redirect all
non existing files to index.html. TODO: Is this what we want?
          add_header Cache-Control 'max-age=86400'; # one day
          include /etc/nginx/security-headers.conf;
        }
    }
}

Posted at Nginx Forum: 
https://forum.nginx.org/read.php?2,290773,290833#msg-290833

_______________________________________________
nginx mailing list
nginx@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx

Reply via email to