Retailer RESTFul API
post
http://localhost:8081
/apis/tasks
Receive Tasks
Schema
Example
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://www.bitsky.ai/schemas/task.json",
"type": "object",
"title": "Task Schema",
"version": "1.0.0",
"required": ["retailer", "url"],
"additionalProperties": false,
"properties": {
"system": {
"type": "object",
"description": "Following properties are updated by system, user cannot direct update them",
"additionalProperties": false,
"properties": {
"state": {
"title": "State",
"type": "string",
"description": "State of this producer",
"enum": [
"DRAFT",
"CONFIGURED",
"FINISHED",
"RUNNING",
"FAILED",
"PAUSED",
"TIMEOUT"
]
},
"securityKey": {
"$ref": "#/definitions/securityKey"
},
"created": {
"$ref": "#/definitions/created"
},
"modified": {
"$ref": "#/definitions/modified"
},
"startedAt": {
"allOf": [
{
"$ref": "#/definitions/timestamp"
},
{
"title": "Task started at"
}
]
},
"endedAt": {
"allOf": [
{
"$ref": "#/definitions/timestamp"
},
{
"title": "Task finished at"
}
]
},
"producer": {
"title": "Execute Producer",
"description": "Which producer to execute this task",
"type": "object",
"additionalProperties": false,
"properties": {
"globalId": {
"$ref": "#/definitions/globalId"
},
"type": {
"$ref": "#/definitions/producerType"
},
"retryTimes": {
"type": "integer",
"description": "How many times client side retried to re-execute this task"
},
"startedAt": {
"allOf": [
{
"$ref": "#/definitions/timestamp"
},
{
"description": "Producer start to execute this task"
}
]
},
"endedAt": {
"allOf": [
{
"$ref": "#/definitions/timestamp"
},
{
"title": "Producer end to execute this task"
}
]
}
}
},
"version": {
"$ref": "#/definitions/version"
},
"failuresNumber": {
"title": "Number of failures",
"type": "integer",
"default": 0,
"description": "How many times this task was fail running.",
"minimum": 0
},
"failuresReason": {
"type": "string",
"description": "Producer collect fail reason"
}
}
},
"_id": {
"$ref": "#/definitions/_id"
},
"globalId": {
"$ref": "#/definitions/globalId"
},
"type": {
"title": "Task Type",
"type": "string",
"default": "CRAWLER",
"description": "Type of this task, this type will help producer to decide how to run it. For now, we only have one type **web crawler**",
"enum": ["CRAWLER"]
},
"name": {
"$ref": "#/definitions/name"
},
"description": {
"$ref": "#/definitions/description"
},
"retailer": {
"type": "object",
"description": "After producer execute this task, which Retailer to send **callback** to",
"required": ["globalId"],
"properties": {
"globalId": {
"$ref": "#/definitions/globalId"
},
"state": {
"$ref": "#/definitions/retailerState"
}
}
},
"permission": {
"$ref": "#/definitions/permission"
},
"priority": {
"type": "integer",
"minimum": 1,
"default": 100,
"description": "Priority of this task. Only compare priority for same Retailer, doesn't compare cross Retailer. Big value low priority. Priority value 1 is higher than priority value 2."
},
"suitableProducers": {
"type": "array",
"description": "What kind of producer types can execute this task",
"default": ["HEADLESSBROWSER"],
"items": {
"$ref": "#/definitions/producerType"
},
"minItems": 1,
"uniqueItems": true
},
"url": {
"$ref": "#/definitions/url"
},
"metadata": {
"type": "object",
"additionalProperties": true,
"description": "Additional metadata for this task",
"properties": {
"script": {
"type": "string",
"description": "Code want to execute after **window.onload**. You only need to pass function body. And Default you will get following parameters - **resolve**, **reject**, **task**, **axios**. You need to resolve(data) or reject(err), and will send data or err to Retailer. If you need producer to execute your code, Only work with **HEADLESSBROSWER** producer"
}
}
},
"dataset": {
"$ref": "#/definitions/dataset"
}
},
"definitions": {
"_id": {
"title": "ID",
"description": "identifier of this entitle",
"type": ["string", "object"]
},
"created": {
"allOf": [
{
"$ref": "#/definitions/timestamp"
},
{
"title": "Created at"
}
]
},
"modified": {
"allOf": [
{
"$ref": "#/definitions/timestamp"
},
{
"title": "Last time modified at"
}
]
},
"timestamp": {
"type": ["integer", "null"],
"description": "Timestamp of when this task was assigned to an producer",
"examples": [1553974629109]
},
"version": {
"title": "Version",
"description": "version of this producer",
"type": "string",
"pattern": "^\\d+\\.\\d+\\.\\d+$",
"default": "1.0.0",
"examples": ["1.0.0"]
},
"globalId": {
"title": "Global ID",
"description": "globally unique identifier, if you don't pass, then it will automatically generate one",
"type": "string",
"examples": ["59f43b55-46a3-4efc-a960-018bcca91f46"]
},
"producerType": {
"title": "Producer Type",
"type": "string",
"description": "Producer type of this producer",
"enum": ["HTTP", "HEADLESSBROWSER"]
},
"permission": {
"type": "string",
"description": "Control who can run this task. Private mean only matched **securityKey** can run, public mean any matched producer can collect. If your metadata has script, then it will be forced to private.",
"default": "PRIVATE",
"enum": ["PRIVATE", "PUBLIC"]
},
"url": {
"type": "string",
"format": "uri",
"description": "Target URL for this task.",
"examples": ["https://json-schema.org"]
},
"name": {
"title": "Name",
"description": "Give a meaningful name",
"type": "string",
"minLength": 1,
"maxLength": 100,
"examples": ["My Chrome Extension"]
},
"description": {
"title": "Description",
"description": "description of this producer",
"type": "string",
"minLength": 1,
"maxLength": 200,
"examples": ["My chrome extension used for test purpose"]
},
"securityKey": {
"title": "Security Key",
"type": "string",
"description": "Security key is used to identify different user"
},
"state": {
"title": "State",
"type": "string",
"description": "State of this entitle",
"enum": ["DRAFT", "CONFIGURED", "ACTIVE", "DELETED"]
},
"retailerState": {
"title": "Retailer State",
"type": "string",
"description": "State of this entitle",
"enum": ["ACTIVE", "FAILED"]
},
"dataset": {
"type": "object",
"additionalProperties": true,
"description": "All data that was collected by producer when it executed task. You also can use **metadata.script** to add more properties",
"properties": {
"url": {
"type": "string",
"description": "The finally URL that collect this information. Sometime, previous URL maybe redirect.",
"format": "uri",
"examples": ["https://json-schema.org"]
},
"data": {
"type": "object",
"description": "Collect ",
"properties": {
"contentType": {
"type": "string",
"title": "Content Type",
"description": "Content Type of return data",
"default": "html",
"enum": ["html", "json", "text"]
},
"content": {
"oneOf": [
{
"type": "string",
"description": "Collect html or text string"
},
{
"type": "object",
"description": "Collect JSON data"
}
]
}
}
}
}
}
}
}
{
"globalId": "a110bda5-abd1-4633-9dbb-bc1310d7662f",
"type": "CRAWLER",
"retailer": {
"globalId": "32e9671f-8f3c-45ac-8dfe-d5b5d9a0cbc1",
"state": "ACTIVE"
},
"permission": "PRIVATE",
"priority": 1,
"suitableProducers": [
"HEADLESSBROWSER"
],
"url": "http://exampleblog.bitsky.ai/",
"metadata": {
"type": "instagram",
"script": "async function customFunction() {\n await $$page.waitFor(5 * 1000);\n}"
},
"dataset": {
"url": "http://exampleblog.bitsky.ai/",
"data": {
"contentType": "html",
"content": "<head>\n <meta charset=\"utf-8\">\n <meta http-equiv=\"X-UA-Compatible\" content=\"IE=edge\">\n <!-- <meta name=\"google-site-verification\" content=\"xBT4GhYoi5qRD5tr338pgPM5OWHHIDR6mNg1a3euekI\" /> -->\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1, viewport-fit=cover\">\n <meta name=\"description\" content=\"This is a test blog for munew user to learn how to use munew product\">\n <meta name=\"keywords\" content=\"\">\n <meta name=\"theme-color\" content=\"#000000\">\n \n <!-- Open Graph -->\n <meta property=\"og:title\" content=\"\">\n \n <meta property=\"og:type\" content=\"website\">\n <meta property=\"og:description\" content=\"Make Life Productive(让生命富有成效)\">\n \n <meta property=\"og:image\" content=\"\">\n <meta property=\"og:url\" content=\"/\">\n <meta property=\"og:site_name\" content=\"\">\n \n <title></title>\n\n <!-- Favicon -->\n <link rel=\"shortcut icon\" href=\"/img/favicon.ico\">\n \n <!-- Canonical URL -->\n <link rel=\"canonical\" href=\"/\">\n\n <!-- Bootstrap Core CSS -->\n <link rel=\"stylesheet\" href=\"/css/bootstrap.min.css\">\n\n <!-- Custom CSS -->\n <link rel=\"stylesheet\" href=\"/css/hux-blog.min.css\">\n\n <!-- Custom Fonts -->\n <!-- <link href=\"http://maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css\" rel=\"stylesheet\" type=\"text/css\"> -->\n <!-- Hux change font-awesome CDN to qiniu -->\n <link href=\"//cdnjs.cloudflare.com/ajax/libs/font-awesome/4.6.3/css/font-awesome.min.css\" rel=\"stylesheet\" type=\"text/css\">\n\n\n <!-- HTML5 Shim and Respond.js IE8 support of HTML5 elements and media queries -->\n <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->\n <!--[if lt IE 9]>\n <script src=\"https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js\"></script>\n <script src=\"https://oss.maxcdn.com/libs/respond.js/1.4.2/respond.min.js\"></script>\n <![endif]-->\n\n <!-- ga & ba script hoook -->\n <script src=\"//cdnjs.cloudflare.com/ajax/libs/fastclick/1.0.6/fastclick.min.js\"></script><script></script>\n</head>\n\n\n<!-- hack iOS CSS :active style -->\n<body ontouchstart=\"\">\n\n <!-- Navigation -->\n\n<nav class=\"navbar navbar-default navbar-custom navbar-fixed-top\">\n\n <div class=\"container-fluid\">\n <!-- Brand and toggle get grouped for better mobile display -->\n <div class=\"navbar-header page-scroll\">\n <button type=\"button\" class=\"navbar-toggle\">\n <span class=\"sr-only\">Toggle navigation</span>\n <span class=\"icon-bar\"></span>\n <span class=\"icon-bar\"></span>\n <span class=\"icon-bar\"></span>\n </button>\n <a class=\"navbar-brand\" href=\"/\">Munew Example Blog</a>\n </div>\n\n <!-- Collect the nav links, forms, and other content for toggling -->\n <div id=\"huxblog_navbar\">\n <div class=\"navbar-collapse\">\n <ul class=\"nav navbar-nav navbar-right\">\n <li>\n <a href=\"/\">Home</a>\n </li>\n \n \n \n \n <li>\n <a href=\"/about/\">About</a>\n </li>\n \n \n \n <li>\n <a href=\"/archive/\">Archive</a>\n </li>\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n </ul>\n </div>\n </div>\n <!-- /.navbar-collapse -->\n </div>\n <!-- /.container -->\n</nav>\n<script>\n // Drop Bootstarp low-performance Navbar\n // Use customize navbar with high-quality material design animation\n // in high-perf jank-free CSS3 implementation\n var $body = document.body;\n var $toggle = document.querySelector('.navbar-toggle');\n var $navbar = document.querySelector('#huxblog_navbar');\n var $collapse = document.querySelector('.navbar-collapse');\n\n var __HuxNav__ = {\n close: function(){\n $navbar.className = \" \";\n // wait until animation end.\n setTimeout(function(){\n // prevent frequently toggle\n if($navbar.className.indexOf('in') < 0) {\n $collapse.style.height = \"0px\"\n }\n },400)\n },\n open: function(){\n $collapse.style.height = \"auto\"\n $navbar.className += \" in\";\n }\n }\n\n // Bind Event\n $toggle.addEventListener('click', function(e){\n if ($navbar.className.indexOf('in') > 0) {\n __HuxNav__.close()\n }else{\n __HuxNav__.open()\n }\n })\n\n /**\n * Since Fastclick is used to delegate 'touchstart' globally\n * to hack 300ms delay in iOS by performing a fake 'click',\n * Using 'e.stopPropagation' to stop 'touchstart' event from \n * $toggle/$collapse will break global delegation.\n * \n * Instead, we use a 'e.target' filter to prevent handler\n * added to document close HuxNav. \n *\n * Also, we use 'click' instead of 'touchstart' as compromise\n */\n document.addEventListener('click', function(e){\n if(e.target == $toggle) return;\n if(e.target.className == 'icon-bar') return;\n __HuxNav__.close();\n })\n</script>\n\n\n <!-- Page Header -->\n\n\n\n\n\n\n\n<header class=\"intro-header\" style=\"background-image: url('/img/home-bg.jpg')\">\n <div class=\"container\">\n <div class=\"row\">\n <div class=\"col-lg-8 col-lg-offset-2 col-md-10 col-md-offset-1\">\n \n <div class=\"site-heading\">\n \n <h1>Munew Example Blog</h1>\n <span class=\"subheading\">Make Life Productive(让生命富有成效)</span>\n </div>\n </div>\n </div>\n </div>\n</header>\n\n\n<!-- Main Content -->\n<div class=\"container\">\n\t<div class=\"row\">\n \n\n<!-- USE SIDEBAR -->\n <!-- PostList Container -->\n \t\t<div class=\"\n col-lg-8 col-lg-offset-1\n col-md-8 col-md-offset-1\n col-sm-12\n col-xs-12\n postlist-container\n \">\n \t\t\t\n<div class=\"post-preview\">\n <a href=\"/1916/the-road-not-taken\">\n <h2 class=\"post-title\">\n The Road Not Taken\n </h2>\n \n <div class=\"post-content-preview\">\n \n \n\nTwo roads diverged in a yellow wood,\n\n\nAnd sorry I could not travel both\n\n\nAnd be one traveler, long I stood\n\n\nAnd looked down one as far as I could\n\n\nTo where it bent in the undergrowth;\n\n\n\n\n\nTh...\n \n </div>\n </a>\n <p class=\"post-meta\">\n Posted by <span class=\"author\">Robert Frost</span> on <span class=\"date\"> 1916</span>\n </p>\n</div>\n<hr>\n\n<div class=\"post-preview\">\n <a href=\"/1883/the-new-colossus\">\n <h2 class=\"post-title\">\n The New Colossus\n </h2>\n \n <div class=\"post-content-preview\">\n \n \n\nNot like the brazen giant of Greek fame,\n\n\nWith conquering limbs astride from land to land;\n\n\nHere at our sea-washed, sunset gates shall stand\n\n\nA mighty woman with a torch, whose flame\n\n\nIs the ...\n \n </div>\n </a>\n <p class=\"post-meta\">\n Posted by <span class=\"author\">Emma Lazarus</span> on <span class=\"date\"> 1883</span>\n </p>\n</div>\n<hr>\n\n<div class=\"post-preview\">\n <a href=\"/1838/a-psalm-of-life\">\n <h2 class=\"post-title\">\n A Psalm of Life\n </h2>\n \n <div class=\"post-content-preview\">\n \n \n \n Tell me not, in mournful numbers,\n Life is but an empty dream!\n For the soul is dead that slumbers,\n And things are not what they seem.\n \n \n Life is real! Life is earnest!\n ...\n \n </div>\n </a>\n <p class=\"post-meta\">\n Posted by <span class=\"author\">Henry Wadsworth Longfellow</span> on <span class=\"date\"> 1838</span>\n </p>\n</div>\n<hr>\n\n\n<!-- Pager -->\n\n<ul class=\"pager\">\n \n \n <li class=\"next\">\n <a href=\"/page2\">Older Posts →</a>\n </li>\n \n</ul>\n\n\n \t\t</div>\n <!-- Sidebar Container -->\n <div class=\"\n col-lg-3 col-lg-offset-0\n col-md-3 col-md-offset-0\n col-sm-12\n col-xs-12\n sidebar-container\n \">\n <!-- Featured Tags -->\n \n\n\n<section>\n \n <h5><a href=\"/archive/\">FEATURED TAGS</a></h5>\n <div class=\"tags\">\n \n \n \n \n \n \n <a data-sort=\"0000\" href=\"/archive/?tag=poem\" title=\"poem\" rel=\"10\">poem\n </a></div><a data-sort=\"0000\" href=\"/archive/?tag=poem\" title=\"poem\" rel=\"10\">\n</a></section><a data-sort=\"0000\" href=\"/archive/?tag=poem\" title=\"poem\" rel=\"10\">\n\n\n <!-- Short About -->\n </a><section class=\"visible-md visible-lg\"><a data-sort=\"0000\" href=\"/archive/?tag=poem\" title=\"poem\" rel=\"10\">\n <hr>\n </a><h5><a data-sort=\"0000\" href=\"/archive/?tag=poem\" title=\"poem\" rel=\"10\"></a><a href=\"/about/\">ABOUT ME</a></h5>\n <div class=\"short-about\">\n \n \n <!-- SNS Link -->\n \n\n\n<ul class=\"list-inline\">\n\n\n \n \n <li>\n <a href=\"https://twitter.com/munewio\">\n <span class=\"fa-stack fa-lg\">\n <i class=\"fa fa-circle fa-stack-2x\"></i>\n <i class=\"fa fa-twitter fa-stack-1x fa-inverse\"></i>\n </span>\n </a>\n </li>\n \n \n \n \n \n <li>\n <a target=\"_blank\" href=\"https://github.com/munew\">\n <span class=\"fa-stack fa-lg\">\n <i class=\"fa fa-circle fa-stack-2x\"></i>\n <i class=\"fa fa-github fa-stack-1x fa-inverse\"></i>\n </span>\n </a>\n </li>\n \n \n</ul>\n </div>\n</section>\n\n <!-- Friends Blog -->\n \n \t\t</div>\n \n\t</div>\n</div>\n\n\n\n <!-- Footer -->\n<footer>\n <div class=\"container\">\n <div class=\"row\">\n <div class=\"col-lg-8 col-lg-offset-2 col-md-10 col-md-offset-1\">\n <!-- SNS Link -->\n \n\n\n<ul class=\"list-inline text-center\">\n\n\n \n \n <li>\n <a href=\"https://twitter.com/munewio\">\n <span class=\"fa-stack fa-lg\">\n <i class=\"fa fa-circle fa-stack-2x\"></i>\n <i class=\"fa fa-twitter fa-stack-1x fa-inverse\"></i>\n </span>\n </a>\n </li>\n \n \n \n \n \n <li>\n <a target=\"_blank\" href=\"https://github.com/munew\">\n <span class=\"fa-stack fa-lg\">\n <i class=\"fa fa-circle fa-stack-2x\"></i>\n <i class=\"fa fa-github fa-stack-1x fa-inverse\"></i>\n </span>\n </a>\n </li>\n \n \n</ul>\n\n <p class=\"copyright text-muted\">\n Copyright © Munew Example Blog 2019\n <br>\n Powered by <a href=\"http://huangxuan.me\">Hux Blog</a> |\n <iframe style=\"margin-left: 2px; margin-bottom:-5px;\" frameborder=\"0\" scrolling=\"0\" width=\"100px\" height=\"20px\" src=\"https://ghbtns.com/github-btn.html?user=huxpro&repo=huxpro.github.io&type=star&count=true\">\n </iframe>\n </p>\n </div>\n </div>\n </div>\n</footer>\n\n<!-- jQuery -->\n<script src=\"/js/jquery.min.js \"></script>\n\n<!-- Bootstrap Core JavaScript -->\n<!-- Currently, only navbar scroll-down effect at desktop still depends on this -->\n<script src=\"/js/bootstrap.min.js \"></script>\n\n<!-- Custom Theme JavaScript -->\n<script src=\"/js/hux-blog.min.js \"></script>\n\n<!-- Service Worker -->\n\n<script src=\"/js/snackbar.js \"></script>\n<script src=\"/js/sw-registration.js \"></script>\n\n\n<!-- async load function -->\n<script>\n function async(u, c) {\n var d = document, t = 'script',\n o = d.createElement(t),\n s = d.getElementsByTagName(t)[0];\n o.src = u;\n if (c) { o.addEventListener('load', function (e) { c(null, e); }, false); }\n s.parentNode.insertBefore(o, s);\n }\n</script>\n\n<!--\n Because of the native support for backtick-style fenced code blocks\n right within the Markdown is landed in Github Pages,\n From V1.6, There is no need for Highlight.js,\n so Huxblog drops it officially.\n\n - https://github.com/blog/2100-github-pages-now-faster-and-simpler-with-jekyll-3-0\n - https://help.github.com/articles/creating-and-highlighting-code-blocks/\n - https://github.com/jneen/rouge/wiki/list-of-supported-languages-and-lexers\n-->\n<!--\n <script>\n async(\"http://cdn.bootcss.com/highlight.js/8.6/highlight.min.js\", function(){\n hljs.initHighlightingOnLoad();\n })\n </script>\n <link href=\"http://cdn.bootcss.com/highlight.js/8.6/styles/github.min.css\" rel=\"stylesheet\">\n-->\n\n\n\n\n\n<!--fastClick.js -->\n<script>\n async(\"//cdnjs.cloudflare.com/ajax/libs/fastclick/1.0.6/fastclick.min.js\", function(){\n var $nav = document.querySelector(\"nav\");\n if($nav) FastClick.attach($nav);\n })\n</script>\n\n\n<!-- Google Analytics -->\n\n\n\n<!-- Baidu Tongji -->\n\n\n\n<!-- Side Catalog -->\n\n\n\n<!-- Multi-Lingual -->\n\n\n\n\n<!-- Image to hack wechat -->\n<!-- <img src=\"/img/icon_wechat.png\" width=\"0\" height=\"0\" /> -->\n<!-- Migrate from head to bottom, no longer block render and still work -->\n\n\n\n\n</body>"
}
},
"system": {
"state": "FINISHED",
"created": 1603125800007,
"modified": 1603125817378,
"startedAt": 1603125809711,
"endedAt": 1603125817378,
"version": "1.0.0",
"producer": {
"globalId": "a9d25b37-073e-48e1-a5ac-5f9310f66542",
"type": "HEADLESSBROWSER",
"endedAt": 1603125817351
}
}
}
get
http://localhost:8081
/health
Health check
get
http://localhost:8081
/apis/tasks/trigger
Initial Tasks(optional)
Last modified 2yr ago