Skip to content

Commit

Permalink
Merge pull request #818 from crawlab-team/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
tikazyq authored Jul 31, 2020
2 parents aabedb1 + 4991141 commit dcb9351
Show file tree
Hide file tree
Showing 41 changed files with 869 additions and 205 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG-zh.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# 0.5.1 (2020-07-31)
### 功能 / 优化
- **加入错误详情信息**.
- **加入 Golang 编程语言支持**.
- **加入 Chrome Driver Firefox Web Driver 安装脚本**.
- **支持系统任务**. "系统任务"跟普通爬虫任务相似允许用户查看诸如安装语言之类的任务日志.
- **将安装语言从 RPC 更改为系统任务**.

### Bug 修复
- **修复在爬虫市场中第一次下载爬虫时会报500错误**. [#808](https://github.com/crawlab-team/crawlab/issues/808)
- **修复一部分翻译问题**.
- **修复任务详情 500 错误**. [#810](https://github.com/crawlab-team/crawlab/issues/810)
- **修复密码重置问题**. [#811](https://github.com/crawlab-team/crawlab/issues/811)
- **修复无法下载 CSV 问题**. [#812](https://github.com/crawlab-team/crawlab/issues/812)
- **修复无法安装 Node.js 问题**. [#813](https://github.com/crawlab-team/crawlab/issues/813)
- **修复批量添加定时任务时默认为禁用问题**. [#814](https://github.com/crawlab-team/crawlab/issues/814)

# 0.5.0 (2020-07-19)
### 功能 / 优化
- **爬虫市场**. 允许用户下载开源爬虫到 Crawlab.
Expand Down
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# 0.5.1 (2020-07-31)
### Features / Enhancement
- **Added error message details**.
- **Added Golang programming language support**.
- **Added web driver installation scripts for Chrome Driver and Firefox**.
- **Support system tasks**. A "system task" is similar to normal spider task, it allows users to view logs of general tasks such as installing languages.
- **Changed methods of installing languages from RPC to system tasks**.

### Bug Fixes
- **Fixed first download repo 500 error in Spider Market page**. [#808](https://github.com/crawlab-team/crawlab/issues/808)
- **Fixed some translation issues**.
- **Fixed 500 error in task detail page**. [#810](https://github.com/crawlab-team/crawlab/issues/810)
- **Fixed password reset issue**. [#811](https://github.com/crawlab-team/crawlab/issues/811)
- **Fixed unable to download CSV issue**. [#812](https://github.com/crawlab-team/crawlab/issues/812)
- **Fixed unable to install node.js issue**. [#813](https://github.com/crawlab-team/crawlab/issues/813)
- **Fixed disabled status for batch adding schedules**. [#814](https://github.com/crawlab-team/crawlab/issues/814)

# 0.5.0 (2020-07-19)
### Features / Enhancement
- **Spider Market**. Allow users to download open-source spiders into Crawlab.
Expand Down
3 changes: 2 additions & 1 deletion backend/conf/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@ server:
java: "N"
dotnet: "N"
php: "N"
scripts: "/app/backend/scripts"
spider:
path: "/app/spiders"
task:
workers: 16
other:
tmppath: "/tmp"
version: 0.5.0
version: 0.5.1
setting:
crawlabLogToES: "N" # Send crawlab runtime log to ES, open this option "Y", remember to set esClient
crawlabLogIndex: "crawlab-log"
Expand Down
5 changes: 5 additions & 0 deletions backend/constants/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,8 @@ const (
InstallStatusInstallingOther = "installing-other"
InstallStatusInstalled = "installed"
)

const (
LangTypeLang = "lang"
LangTypeWebDriver = "webdriver"
)
5 changes: 5 additions & 0 deletions backend/constants/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,8 @@ const (
RunTypeRandom string = "random"
RunTypeSelectedNodes string = "selected-nodes"
)

const (
TaskTypeSpider string = "spider"
TaskTypeSystem string = "system"
)
1 change: 1 addition & 0 deletions backend/entity/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type Lang struct {
InstallStatus string `json:"install_status"`
DepFileName string `json:"dep_file_name"`
InstallDepArgs string `json:"install_dep_cmd"`
Type string `json:"type"`
}

type Dependency struct {
Expand Down
20 changes: 13 additions & 7 deletions backend/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,11 @@ func main() {
authGroup.POST("/tasks-cancel", routes.CancelSelectedTask) // 批量取消任务
authGroup.POST("/tasks-restart", routes.RestartSelectedTask) // 批量重试任务
}
// 系统任务/脚本
{
authGroup.PUT("/system-tasks", routes.PutSystemTask) // 运行系统任务
authGroup.GET("/system-scripts", routes.GetSystemScripts) // 获取系统脚本列表
}
// 定时任务
{
authGroup.GET("/schedules", routes.GetScheduleList) // 定时任务列表
Expand All @@ -269,13 +274,14 @@ func main() {
}
// 用户
{
authGroup.GET("/users", routes.GetUserList) // 用户列表
authGroup.GET("/users/:id", routes.GetUser) // 用户详情
authGroup.POST("/users/:id", routes.PostUser) // 更改用户
authGroup.DELETE("/users/:id", routes.DeleteUser) // 删除用户
authGroup.PUT("/users-add", routes.PutUser) // 添加用户
authGroup.GET("/me", routes.GetMe) // 获取自己账户
authGroup.POST("/me", routes.PostMe) // 修改自己账户
authGroup.GET("/users", routes.GetUserList) // 用户列表
authGroup.GET("/users/:id", routes.GetUser) // 用户详情
authGroup.POST("/users/:id", routes.PostUser) // 更改用户
authGroup.DELETE("/users/:id", routes.DeleteUser) // 删除用户
authGroup.PUT("/users-add", routes.PutUser) // 添加用户
authGroup.GET("/me", routes.GetMe) // 获取自己账户
authGroup.POST("/me", routes.PostMe) // 修改自己账户
authGroup.POST("/me/change-password", routes.PostMeChangePassword) // 修改自己密码
}
// 系统
{
Expand Down
5 changes: 4 additions & 1 deletion backend/model/schedule.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,10 @@ func GetScheduleList(filter interface{}) ([]Schedule, error) {
if schedule.RunType == constants.RunTypeSelectedNodes {
for _, nodeId := range schedule.NodeIds {
// 选择单一节点
node, _ := GetNode(nodeId)
node, err := GetNode(nodeId)
if err != nil {
continue
}
schedule.Nodes = append(schedule.Nodes, node)
}
}
Expand Down
18 changes: 18 additions & 0 deletions backend/model/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"crawlab/database"
"crawlab/utils"
"github.com/apex/log"
"github.com/globalsign/mgo"
"github.com/globalsign/mgo/bson"
"runtime/debug"
"time"
Expand All @@ -29,6 +30,7 @@ type Task struct {
Pid int `json:"pid" bson:"pid"`
RunType string `json:"run_type" bson:"run_type"`
ScheduleId bson.ObjectId `json:"schedule_id" bson:"schedule_id"`
Type string `json:"type" bson:"type"`

// 前端数据
SpiderName string `json:"spider_name"`
Expand Down Expand Up @@ -514,3 +516,19 @@ func UpdateTaskErrorLogs(taskId string, errorRegexPattern string) error {

return nil
}

func GetTaskByFilter(filter bson.M) (t Task, err error) {
s, c := database.GetCol("tasks")
defer s.Close()

if err := c.Find(filter).One(&t); err != nil {
if err != mgo.ErrNotFound {
log.Errorf("find task by filter error: " + err.Error())
debug.PrintStack()
return t, err
}
return t, err
}

return t, nil
}
3 changes: 3 additions & 0 deletions backend/routes/schedule.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,9 @@ func PutBatchSchedules(c *gin.Context) {
// 添加 UserID
s.UserId = services.GetCurrentUserId(c)

// 默认启用
s.Enabled = true

// 添加定时任务
if err := model.AddSchedule(s); err != nil {
log.Errorf("add schedule error: " + err.Error())
Expand Down
3 changes: 3 additions & 0 deletions backend/routes/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,7 @@ func RunSelectedSpider(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeAllNodes,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}

id, err := services.AddTask(t)
Expand All @@ -830,6 +831,7 @@ func RunSelectedSpider(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeRandom,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}
id, err := services.AddTask(t)
if err != nil {
Expand All @@ -847,6 +849,7 @@ func RunSelectedSpider(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeSelectedNodes,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}

id, err := services.AddTask(t)
Expand Down
118 changes: 118 additions & 0 deletions backend/routes/system_tasks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
package routes

import (
"crawlab/constants"
"crawlab/model"
"crawlab/services"
"crawlab/utils"
"fmt"
"github.com/gin-gonic/gin"
"github.com/globalsign/mgo/bson"
"net/http"
)

func GetSystemScripts(c *gin.Context) {
HandleSuccessData(c, utils.GetSystemScripts())
}

func PutSystemTask(c *gin.Context) {
type TaskRequestBody struct {
RunType string `json:"run_type"`
NodeIds []bson.ObjectId `json:"node_ids"`
Script string `json:"script"`
}

// 绑定数据
var reqBody TaskRequestBody
if err := c.ShouldBindJSON(&reqBody); err != nil {
HandleError(http.StatusBadRequest, c, err)
return
}

// 校验脚本参数不为空
if reqBody.Script == "" {
HandleErrorF(http.StatusBadRequest, c, "script cannot be empty")
return
}

// 校验脚本参数是否存在
var allScripts = utils.GetSystemScripts()
if !utils.StringArrayContains(allScripts, reqBody.Script) {
HandleErrorF(http.StatusBadRequest, c, "script does not exist")
return
}

// TODO: 校验脚本是否正在运行

// 获取执行命令
cmd := fmt.Sprintf("sh %s", utils.GetSystemScriptPath(reqBody.Script))

// 任务ID
var taskIds []string

if reqBody.RunType == constants.RunTypeAllNodes {
// 所有节点
nodes, err := model.GetNodeList(nil)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
for _, node := range nodes {
t := model.Task{
SpiderId: bson.ObjectIdHex(constants.ObjectIdNull),
NodeId: node.Id,
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeAllNodes,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSystem,
Cmd: cmd,
}
id, err := services.AddTask(t)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
taskIds = append(taskIds, id)
}
} else if reqBody.RunType == constants.RunTypeRandom {
// 随机
t := model.Task{
SpiderId: bson.ObjectIdHex(constants.ObjectIdNull),
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeRandom,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSystem,
Cmd: cmd,
}
id, err := services.AddTask(t)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
taskIds = append(taskIds, id)
} else if reqBody.RunType == constants.RunTypeSelectedNodes {
// 指定节点
for _, nodeId := range reqBody.NodeIds {
t := model.Task{
SpiderId: bson.ObjectIdHex(constants.ObjectIdNull),
NodeId: nodeId,
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeSelectedNodes,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSystem,
Cmd: cmd,
}
id, err := services.AddTask(t)
if err != nil {
HandleError(http.StatusInternalServerError, c, err)
return
}
taskIds = append(taskIds, id)
}
} else {
HandleErrorF(http.StatusInternalServerError, c, "invalid run_type")
return
}

HandleSuccessData(c, taskIds)
}
10 changes: 10 additions & 0 deletions backend/routes/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ type TaskListRequestData struct {
SpiderId string `form:"spider_id"`
ScheduleId string `form:"schedule_id"`
Status string `form:"status"`
Type string `form:"type"`
}

type TaskResultsRequestData struct {
Expand Down Expand Up @@ -64,6 +65,9 @@ func GetTaskList(c *gin.Context) {
if data.ScheduleId != "" {
query["schedule_id"] = bson.ObjectIdHex(data.ScheduleId)
}
if data.Type != "" {
query["type"] = data.Type
}

// 获取校验
query = services.GetAuthQuery(query, c)
Expand Down Expand Up @@ -150,6 +154,7 @@ func PutTask(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeAllNodes,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}

id, err := services.AddTask(t)
Expand All @@ -168,6 +173,7 @@ func PutTask(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeRandom,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}
id, err := services.AddTask(t)
if err != nil {
Expand All @@ -185,6 +191,7 @@ func PutTask(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeSelectedNodes,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}

id, err := services.AddTask(t)
Expand Down Expand Up @@ -225,6 +232,7 @@ func PutBatchTasks(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeAllNodes,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}

id, err := services.AddTask(t)
Expand All @@ -242,6 +250,7 @@ func PutBatchTasks(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeRandom,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}
id, err := services.AddTask(t)
if err != nil {
Expand All @@ -259,6 +268,7 @@ func PutBatchTasks(c *gin.Context) {
UserId: services.GetCurrentUserId(c),
RunType: constants.RunTypeSelectedNodes,
ScheduleId: bson.ObjectIdHex(constants.ObjectIdNull),
Type: constants.TaskTypeSpider,
}

id, err := services.AddTask(t)
Expand Down
Loading

0 comments on commit dcb9351

Please sign in to comment.