Forcing Timeouts
When using a browser to make requests, some URLs have the tendency to have long timeouts, because some esoteric part of the DOM is still loading. For that reason, Shifter's Scraping API returns all the HTML that could be gathered until the timeout was triggered.
The example on this page demonstrates how to force a timeout after timeout=3000 3 seconds. The maximum value that can be set for this parameter is 60000.
Forcing Timeouts examples
You can specify the maximum allowed time that the engine is allowed to render, by passing the timeout parameter.
GET https://scrape.shifter.io/v1?api_key=api_key&url=https://httpbin.org/get&render_js=1&timeout=200
⇡ Input
curl --request GET --url "https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200"const http = require("https");
const options = {
"method": "GET",
"hostname": "scrape.shifter.io",
"port": null,
"path": "/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200",
"headers": {}
};
const req = http.request(options, function (res) {
const chunks = [];
res.on("data", function (chunk) {
chunks.push(chunk);
});
res.on("end", function () {
const body = Buffer.concat(chunks);
console.log(body.toString());
});
});
req.end();import http.client
conn = http.client.HTTPSConnection("scrape.shifter.io")
conn.request("GET", "/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200")
res = conn.getresponse()
data = res.read()
print(data.decode("utf-8"))<?php
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => "https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "GET",
]);
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #:" . $err;
} else {
echo $response;
}package main
import (
"fmt"
"net/http"
"io/ioutil"
)
func main() {
url := "https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200"
req, _ := http.NewRequest("GET", url, nil)
res, _ := http.DefaultClient.Do(req)
defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)
fmt.Println(res)
fmt.Println(string(body))
}HttpResponse<String> response = Unirest.get("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200")
.asString();var client = new RestClient("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);require 'uri'
require 'net/http'
require 'openssl'
url = URI("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fhttpbin.org%2Fget&render_js=1&timeout=200")
http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
request = Net::HTTP::Get.new(url)
response = http.request(request)
puts response.read_body⇣ Output
{
"status": "Failure",
"status_code": 422,
"created_at": "2022-04-26T11:57:23.242Z",
"processed_at": "2022-04-26T11:57:23.739Z",
"time_taken": {
"total": 0.701,
"scraping": 0.202,
"setup_worker": 0.403
},
"error": "The target page took more than 0.2 seconds to load, the website might be down. Retry the request or increase the value of 'timeout' parameter.",
"page_content": null
}Last updated
Was this helpful?