Shifter Documentation
  • Getting Started
  • PROXIES
    • Rotating Residential Proxies
    • Static Residential Proxies
  • SCRAPING API
    • Web Scraping API
      • API Request
      • Basic Request
      • Rendering JavaScript
      • Custom Headers
      • Custom Cookies
      • Proxies
      • Geolocation
      • Sessions
      • Forcing Timeouts
      • Binary Files
      • Waiting for CSS
      • Screenshot
      • Extraction Rules
      • JSON Auto Parser
      • Javascript Instructions
      • Disable Stealth
      • POST Requests
      • PUT Requests
      • Conclusion
      • API Errors
  • Google SERP API
    • Search APIs
      • Google Search API
      • Google Product API
      • Google Scholar Cite API
      • Google Scholar Author API
      • Google Scholar Profiles API
      • Google Scholar API
      • Google Autocomplete API
      • Google Maps Photos API
      • Google Maps Reviews API
      • Google Reverse Image API
      • Google Events API
      • Google Finance API
      • Google Play API
      • Google Jobs API
      • Google Local Services API
      • Google Jobs Listing API
      • Google Maps API
      • Google Trends API
        • Google Trends Categories List
        • Geo Parameter Options List
    • API Response
  • Bing SERP API
    • Search API
    • API Parameters
    • Basic API Requests
  • Yandex SERP API
    • Search API
    • Basic API Requests
    • API Parameters
  • Amazon API
    • Getting Started
      • API Parameters
      • Access the API
      • Supported Domains
    • Amazon Search
    • Amazon Seller Products
    • Amazon Seller Profile
    • Amazon Seller Feedback
    • Amazon Product
    • Amazon Category
    • Amazon Bestsellers
    • Amazon Deals
  • Useful Links
    • Github
    • Shifter Homepage
    • Knowledge Base
Powered by GitBook
On this page
  • Possible extraction rules
  • Extraction Rules examples

Was this helpful?

  1. SCRAPING API
  2. Web Scraping API

Extraction Rules

Using the extract_rules parameter, you can get the scraped site based on various extraction rules.

Extraction rules can be applied with both JavaScript rendering enabled or disabled.

Possible extraction rules

Parameter
Required
Details

selector

true

The CSS selector.

output

false

Choose between html, text or @[attr]. Default value is html.

all

false

Set this parameter to "1" to return all the HTML elements. The default value is "0".

Extraction Rules examples

GET https://scrape.shifter.io/v1?api_key=api_key&url=https://example.com&extract_rules={"title": {"selector": "h1", "output": "text"}}

⇡ Input

curl --request GET --url "https://scrape.shifter.io/v1?api_key=api_key&&url=https%3A%2F%2Fexample.com&extract_rules=%7B%22title%22%3A%20%7B%22selector%22%3A%20%22h1%22%2C%20%22output%22%3A%20%22text%22%7D%7D"
const http = require("https");

const options = {
  "method": "GET",
  "hostname": "scrape.shifter.io",
  "port": null,
  "path": "/v1?api_key=api_key&url=https%3A%2F%2Fexample.com&extract_rules=%7B%22title%22%3A%20%7B%22selector%22%3A%20%22h1%22%2C%20%22output%22%3A%20%22text%22%7D%7D",
  "headers": {}
};

const req = http.request(options, function (res) {
  const chunks = [];

  res.on("data", function (chunk) {
    chunks.push(chunk);
  });

  res.on("end", function () {
    const body = Buffer.concat(chunks);
    console.log(body.toString());
  });
});

req.end();
import http.client

conn = http.client.HTTPSConnection("scrape.shifter.io")

conn.request("GET", "/v1?api_key=api_key&url=https%3A%2F%2Fexample.com&extract_rules=%7B%22title%22%3A%20%7B%22selector%22%3A%20%22h1%22%2C%20%22output%22%3A%20%22text%22%7D%7D")

res = conn.getresponse()
data = res.read()

print(data.decode("utf-8"))
<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fexample.com&extract_rules=%7B%22title%22%3A%20%7B%22selector%22%3A%20%22h1%22%2C%20%22output%22%3A%20%22text%22%7D%7D",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "GET",
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}
package main

import (
	"fmt"
	"net/http"
	"io/ioutil"
)

func main() {

	url := "https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fexample.com&extract_rules=%7B%22title%22%3A%20%7B%22selector%22%3A%20%22h1%22%2C%20%22output%22%3A%20%22text%22%7D%7D"

	req, _ := http.NewRequest("GET", url, nil)

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := ioutil.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
HttpResponse<String> response = Unirest.get("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fexample.com&extract_rules=%7B%22title%22%3A%20%7B%22selector%22%3A%20%22h1%22%2C%20%22output%22%3A%20%22text%22%7D%7D")
var client = new RestClient("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fexample.com&extract_rules=%7B%22title%22%3A%20%7B%22selector%22%3A%20%22h1%22%2C%20%22output%22%3A%20%22text%22%7D%7D");
var request = new RestRequest(Method.GET);
IRestResponse response = client.Execute(request);
require 'uri'
require 'net/http'
require 'openssl'

url = URI("https://scrape.shifter.io/v1?api_key=api_key&url=https%3A%2F%2Fexample.com&extract_rules=%7B%22title%22%3A%20%7B%22selector%22%3A%20%22h1%22%2C%20%22output%22%3A%20%22text%22%7D%7D")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

request = Net::HTTP::Get.new(url)

response = http.request(request)
puts response.read_body

⇣ Output

{"title": "Example Domain"}
PreviousScreenshotNextJSON Auto Parser

Last updated 1 year ago

Was this helpful?