# topicSpider **Repository Path**: findluzi/topicSpider ## Basic Information - **Project Name**: topicSpider - **Description**: No description available - **Primary Language**: Unknown - **License**: Not specified - **Default Branch**: main - **Homepage**: None - **GVP Project**: No ## Statistics - **Stars**: 0 - **Forks**: 0 - **Created**: 2021-01-11 - **Last Updated**: 2022-08-05 ## Categories & Tags **Categories**: Uncategorized **Tags**: None ## README # topicSpider create schema topicSpider collate utf8mb4_0900_ai_ci; create table crawler_field_rule ( id int auto_increment primary key, field_name varchar(100) not null, parse_type int not null, rule varchar(4000) not null, page_rule_id int not null, process_config longtext null, created_time datetime not null, modified_time datetime not null ) comment '爬虫页面抽取的字段规则'; create table crawler_page_rule ( id int auto_increment primary key, level int not null, url_rule varchar(4000) not null, task_id int not null, rule_name varchar(4000) null, page_turn_rule varchar(4000) null, parent_rule_id int not null, page_type int not null, created_time datetime not null, modified_time datetime not null ) comment '爬虫页面规则'; create table crawler_task ( id int unsigned auto_increment primary key, task_name varchar(100) null, priority int unsigned null, comment varchar(4000) null, status int null, created_time datetime not null, modified_time datetime not null ) charset=utf8; create table system_menu ( id int unsigned auto_increment comment 'ID' primary key, pid int unsigned default '0' not null comment '父ID', title varchar(100) default '' not null comment '名称', icon varchar(100) default '' not null comment '菜单图标', href varchar(100) default '' not null comment '链接', target varchar(20) default '_self' not null comment '链接打开方式', sort int default 0 null comment '菜单排序', status tinyint unsigned default '1' not null comment '状态(0:禁用,1:启用)', remark varchar(255) null comment '备注信息', created_time datetime null comment '创建时间', modified_time datetime null comment '更新时间' ) comment '系统菜单表' charset=utf8; create index href on system_menu (href); create index title on system_menu (title); {"taskName":"T-1001","comment":"第一个爬虫任务","priority":0,"seedPageRule":{"ruleName":"种子页面识别","pageType":1,"urlRule":{"ruleType":0,"paramType":1,"urlPattern":"http://www.mybu.net/link.asp"}},"pageRules":[{"ruleName":"第一层页面识别","pageType":1,"urlRule":{"ruleType":1,"patternType":1,"fillHead":"http://www.mybu.net/","urlPattern":"/html/body/div/center/table/tbody/tr/td/table/tbody/tr/td/div/center/table/tbody/tr/td/a/@href"},"fieldRules":[{"fieldName":"urls","parseType":1,"rule":"/html/body/div[1]/center[1]/table[2]/tbody[1]/tr[1]/td[2]/table[1]/tbody[1]/tr[1]/td[1]/div[1]/center[1]/table[1]/tbody[1]/tr[4]/td[1]/table[1]/tbody[1]/tr[1]/td[1]/table[1]/tbody[1]/tr[1]/td[1]/p[1]/a[1]/@href"}]}]} 1,0,爬虫模板,fa-street-view,/task,_self,0,1,"",2021-12-26 22:14:43,2021-12-26 22:14:43 2,0,我的发布,fa-send-o,/publish,_self,0,1,"",2021-12-26 22:14:43,2021-12-26 22:14:43 INSERT INTO topicSpider.system_menu (id, pid, title, icon, href, target, sort, status, remark, created_time, modified_time) VALUES (1, 0, '工作台', 'fa fa-street-view', '/workstation', '_self', 0, 1, '', '2021-12-26 22:14:43', '2021-12-26 22:14:43'); INSERT INTO topicSpider.system_menu (id, pid, title, icon, href, target, sort, status, remark, created_time, modified_time) VALUES (2, 0, '工具', 'fa fa-send-o', '/tools', '_self', 0, 1, '', '2021-12-26 22:14:43', '2021-12-26 22:14:43'); INSERT INTO topicSpider.system_menu (id, pid, title, icon, href, target, sort, status, remark, created_time, modified_time) VALUES (3, 0, '文档', 'fa fa-send-o', '/docs', '_self', 0, 1, '', '2021-12-26 22:14:43', '2021-12-26 22:14:43'); INSERT INTO topicSpider.system_menu (id, pid, title, icon, href, target, sort, status, remark, created_time, modified_time) VALUES (4, 0, '讨论区', 'fa fa-send-o', '/docs', '_self', 0, 1, '', '2021-12-26 22:14:43', '2021-12-26 22:14:43'); INSERT INTO topicSpider.system_menu (id, pid, title, icon, href, target, sort, status, remark, created_time, modified_time) VALUES (5, 1, '我的模板', 'fa fa-adn', '/task/showPageRulesSetting', '_self', 0, 1, '', '2021-12-26 22:14:43', '2021-12-26 22:14:43'); INSERT INTO topicSpider.system_menu (id, pid, title, icon, href, target, sort, status, remark, created_time, modified_time) VALUES (6, 1, '版本发布', 'fa fa-snowflake-o', '/publish', '_self', 0, 1, '', '2021-12-26 22:14:43', '2021-12-26 22:14:43');