Forcing Timeouts

When using a browser to make requests, some URLs have the tendency to have long timeouts, because some esoteric part of the DOM is still loading. For that reason, Shifter's Scraping API returns all the HTML that could be gathered until the timeout was triggered.

The example on this page demonstrates how to force a timeout after timeout=3000 3 seconds. The maximum value that can be set for this parameter is 60000.

Forcing Timeouts examples

You can specify the maximum allowed time that the engine is allowed to render, by passing the timeout parameter.

GET https://scrape.shifter.io/v1?api_key=api_key&url=https://httpbin.org/get&render_js=1&timeout=200

⇡ Input

curl --request GET --url "https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200"

const http = require("https");

const options = {
  "method": "GET",
  "hostname": "scrape.shifter.io",
  "port": null,
  "path": "/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200",
  "headers": {}
};

const req = http.request(options, function (res) {
  const chunks = [];

  res.on("data", function (chunk) {
    chunks.push(chunk);
  });

  res.on("end", function () {
    const body = Buffer.concat(chunks);
    console.log(body.toString());
  });
});

req.end();

import http.client

conn = http.client.HTTPSConnection("scrape.shifter.io")

conn.request("GET", "/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200")

res = conn.getresponse()
data = res.read()

print(data.decode("utf-8"))

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"net/http"
	"io/ioutil"
)

func main() {

	url := "https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200"

	req, _ := http.NewRequest("GET", url, nil)

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := ioutil.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.get("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200")
  .asString();

var client = new RestClient("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);

require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body

⇣ Output

{
    "status": "Failure",
    "status_code": 422,
    "created_at": "2022-04-26T11:57:23.242Z",
    "processed_at": "2022-04-26T11:57:23.739Z",
    "time_taken": {
        "total": 0.701,
        "scraping": 0.202,
        "setup_worker": 0.403
    },
    "error": "The target page took more than 0.2 seconds to load, the website might be down. Retry the request or increase the value of 'timeout' parameter.",
    "page_content": null
}

PreviousSessions NextBinary Files

Last updated 2 years ago

Was this helpful?