AI Powered Web Scraping with Jina, Google Sheets and OpenAI _ the EASY way


Nội dung File JSON

{
    "nodes": [
        {
            "id": "c3ef40df-084e-435c-9a11-3aa0a2f94f36",
            "name": "When clicking \"Test workflow\"",
            "type": "n8n-nodes-base.manualTrigger",
            "position": [
                740,
                520
            ],
            "parameters": {},
            "typeVersion": 1
        },
        {
            "id": "e0583472-a450-4582-83bc-84a014bea543",
            "name": "Split Out",
            "type": "n8n-nodes-base.splitOut",
            "position": [
                1640,
                520
            ],
            "parameters": {
                "options": {},
                "fieldToSplitOut": "output.results"
            },
            "typeVersion": 1
        },
        {
            "id": "b8aa573d-5b63-4669-900f-bcc915b6ad41",
            "name": "Save to Google Sheets",
            "type": "n8n-nodes-base.googleSheets",
            "position": [
                1900,
                520
            ],
            "parameters": {
                "columns": {
                    "value": {},
                    "schema": [
                        {
                            "id": "name",
                            "type": "string",
                            "display": true,
                            "removed": false,
                            "required": false,
                            "displayName": "name",
                            "defaultMatch": false,
                            "canBeUsedToMatch": true
                        },
                        {
                            "id": "price",
                            "type": "string",
                            "display": true,
                            "removed": false,
                            "required": false,
                            "displayName": "price",
                            "defaultMatch": false,
                            "canBeUsedToMatch": true
                        },
                        {
                            "id": "availability",
                            "type": "string",
                            "display": true,
                            "removed": false,
                            "required": false,
                            "displayName": "availability",
                            "defaultMatch": false,
                            "canBeUsedToMatch": true
                        },
                        {
                            "id": "image",
                            "type": "string",
                            "display": true,
                            "removed": false,
                            "required": false,
                            "displayName": "image",
                            "defaultMatch": false,
                            "canBeUsedToMatch": true
                        },
                        {
                            "id": "link",
                            "type": "string",
                            "display": true,
                            "removed": false,
                            "required": false,
                            "displayName": "link",
                            "defaultMatch": false,
                            "canBeUsedToMatch": true
                        }
                    ],
                    "mappingMode": "autoMapInputData",
                    "matchingColumns": [
                        "Book prices"
                    ]
                },
                "options": {},
                "operation": "append",
                "sheetName": {
                    "__rl": true,
                    "mode": "list",
                    "value": 258629074,
                    "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1VDbfi2PpeheD2ZlO6feX3RdMeSsm0XukQlNVW8uVcuo/edit#gid=258629074",
                    "cachedResultName": "Sheet2"
                },
                "documentId": {
                    "__rl": true,
                    "mode": "list",
                    "value": "1VDbfi2PpeheD2ZlO6feX3RdMeSsm0XukQlNVW8uVcuo",
                    "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1VDbfi2PpeheD2ZlO6feX3RdMeSsm0XukQlNVW8uVcuo/edit?usp=drivesdk",
                    "cachedResultName": "Book Prices"
                }
            },
            "credentials": {
                "googleSheetsOAuth2Api": {
                    "id": "GHRceL2SKjXxz0Dx",
                    "name": "Google Sheets account"
                }
            },
            "typeVersion": 4.2
        },
        {
            "id": "a63c3ab3-6aab-43b2-8af6-8b00e24e0ee6",
            "name": "OpenAI Chat Model",
            "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
            "position": [
                1300,
                700
            ],
            "parameters": {
                "options": {}
            },
            "credentials": {
                "openAiApi": {
                    "id": "5oYe8Cxj7liOPAKk",
                    "name": "Derek T"
                }
            },
            "typeVersion": 1
        },
        {
            "id": "40326966-0c46-4df2-8d80-fa014e05b693",
            "name": "Information Extractor",
            "type": "@n8n/n8n-nodes-langchain.informationExtractor",
            "position": [
                1260,
                520
            ],
            "parameters": {
                "text": "={{ $json.data }}",
                "options": {
                    "systemPromptTemplate": "You are an expert extraction algorithm.\nOnly extract relevant information from the text.\nIf you do not know the value of an attribute asked to extract, you may omit the attribute''s value.\nAlways output the data in a json array called results. Each book should have a title, price, availability and product_url, image_url"
                },
                "schemaType": "manual",
                "inputSchema": "{\n \"results\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"price\": {\n \"type\": \"string\"\n },\n \"title\": {\n \"type\": \"string\"\n },\n \"image_url\": {\n \"type\": \"string\"\n },\n \"product_url\": {\n \"type\": \"string\"\n },\n \"availability\": {\n \"type\": \"string\"\n } \n }\n }\n }\n}"
            },
            "typeVersion": 1
        },
        {
            "id": "8ddca560-8da7-4090-b865-0523f95ca463",
            "name": "Jina Fetch",
            "type": "n8n-nodes-base.httpRequest",
            "position": [
                1020,
                520
            ],
            "parameters": {
                "url": "https://r.jina.ai/http://books.toscrape.com/catalogue/category/books/historical-fiction_4/index.html",
                "options": {
                    "allowUnauthorizedCerts": true
                },
                "authentication": "genericCredentialType",
                "genericAuthType": "httpHeaderAuth"
            },
            "credentials": {
                "httpHeaderAuth": {
                    "id": "ALBmOXmADcPmyHr1",
                    "name": "jina"
                }
            },
            "typeVersion": 4.1
        },
        {
            "id": "b1745cea-fdbe-4f14-b09c-884549beac7e",
            "name": "Sticky Note5",
            "type": "n8n-nodes-base.stickyNote",
            "position": [
                80,
                320
            ],
            "parameters": {
                "color": 5,
                "width": 587,
                "height": 570,
                "content": "## Start here: Step-by Step Youtube Tutorial :star:\n\n[![AI Powered Web Scraping : the EASY way with n8n and Jina.ai (no-code!)](https://img.youtube.com/vi/f3AJYXHirr8/sddefault.jpg)](https://youtu.be/f3AJYXHirr8)\n\n[Google Sheet Example](https://docs.google.com/spreadsheets/d/1VDbfi2PpeheD2ZlO6feX3RdMeSsm0XukQlNVW8uVcuo/edit?usp=sharing)\n\n\n"
            },
            "typeVersion": 1
        }
    ],
    "pinData": {},
    "connections": {
        "Split Out": {
            "main": [
                [
                    {
                        "node": "Save to Google Sheets",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "Jina Fetch": {
            "main": [
                [
                    {
                        "node": "Information Extractor",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "OpenAI Chat Model": {
            "ai_languageModel": [
                [
                    {
                        "node": "Information Extractor",
                        "type": "ai_languageModel",
                        "index": 0
                    }
                ]
            ]
        },
        "Information Extractor": {
            "main": [
                [
                    {
                        "node": "Split Out",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        },
        "When clicking \"Test workflow\"": {
            "main": [
                [
                    {
                        "node": "Jina Fetch",
                        "type": "main",
                        "index": 0
                    }
                ]
            ]
        }
    }
}