9月份写的一个脚本,当时有个中间商找我写脚本,爬取我的打工网内容,我简单看了一下,里面内容都是使用接口获取的,唯一的难点就是需要一个签名字段验证,说实话之前没搞过签名,简单查了一下资料才大概了解怎么搞,因为不熟悉js所以搞了几个小时才搞定,也算是不负众望吧,但是结果是戏剧性的,tm内容拿下来了那个找我的大佬说他把需求给搞错了,我。。。。。。
原需求人家是要让爬简历???你没理解错,就是注册用户投的简历。。。。。这我真搞不定。
好吧,最后人家原客户提出给100辛苦费,,,中间商抽了20,我到手80。。。我没说什么,反正就是找我搞东西先给钱吧。因为以前我一直怕接了单子结果搞不定了尴尬。。。但是从那以后真的就算我搞不定退你钱我也要先收费。今天突然看到这个脚本了,也不晓得还能用吗,反正就放出来了,当时的操作详情我也没留,因为没打算发出来的。
有不懂的可以留言,重在交流!!!
import requests import json import hashlib import time from docx import Document from docx.enum.text import WD_BREAK def sign(d,t): k="WKWeb" s="a323f9b6-1f04-420e-adb9-b06d142c5e63" dd=str(k) + str(t) + d + str(s) a = hashlib.md5() a.update(dd.encode(encoding='utf-8')) sign = a.hexdigest() return sign def detail(id): t=round(int(round(time.time() * 1000)) / 1e3) header = { "Content-Type": "application/json; charset=UTF-8", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36", } d="{\"RecruitId\":%s,\"Lng\":\"114.52082821245915\",\"Lat\":\"38.048684212036896\",\"Address\":\"\",\"UserSign\":null}" %id url="https://www.wodedagong.com/ls_api/LS_RecruitServicesManager/GetRecruitDetail" data = { "AppVer":"1.0.0", "TimeStamp":t, "Lang":"CN", "DeviceName":"web", "DeviceType":"web", "Token":"", "Uid":0, "AppKey":"WKWeb", "Sign":sign(d,t), "Data":d } url = "https://www.wodedagong.com/ls_api/LS_RecruitServicesManager/GetRecruitDetail" res = requests.post(url,data=json.dumps(data), headers=header) data = json.loads(res.text) return data def list(id): t=round(int(round(time.time() * 1000)) / 1e3) d="{\"RecordIndex\":%s,\"RecordSize\":10,\"Lng\":114.52082821245915,\"Lat\":38.048684212036896,\"VirtualLng\":120.6174,\"VirtualLat\":31.335106,\"SearchName\":\"服务员\",\"SortType\":1,\"SalaryRoundId\":0,\"IndustryId\":0,\"ProfessionalIds\":[],\"AreaId\":0,\"Pneumonia\":0}" %id header = { "Content-Type": "application/json; charset=UTF-8", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36", } data = {"AppVer":"1.0.0", "TimeStamp":t, "Lang":"CN", "DeviceName":"web", "DeviceType":"web", "Token":"", "Uid":0, "AppKey":"WKWeb", "Sign":sign(d,t), "Data":d } url = "https://www.wodedagong.com/ls_api/LS_RecruitDataManager/GetRecruitList" res = requests.post(url,data=json.dumps(data), headers=header) data = json.loads(res.text) # print(data) for i in data['Data']['RecordList']: if (i['Type']==1 | i['RecordId'] == 1): print("过滤广告") else: # print(i['RecordId']) # print(i['ShowName']) # print(i['EnterpriseName']) det = detail(i['RecordId']) document = Document('test.docx') paragraphs = document.paragraphs #Add break line after last run paragraphs[0].runs[-1].add_break(WD_BREAK.LINE) paragraphs[0].add_run("企业名称:" + det['Data']['EnterpriseName'] + "\n") paragraphs[0].add_run("薪资范围:" + det['Data']['WagesView'] + "\n") paragraphs[0].add_run("联系电话:" + det['Data']['ContanctsPhone'] + "\n") paragraphs[0].add_run("发布标签:" + str(det['Data']['TagNames']) + "\n") paragraphs[0].add_run("来源用户:" + det['Data']['FromUser'] + "\n") paragraphs[0].add_run("企业地址:" + det['Data']['WorkAddress'] + "\n") paragraphs[0].add_run("招聘岗位:" + det['Data']['WorkPost'] + "\n") paragraphs[0].add_run("基本要求:" + det['Data']['WorkRequire'] + "\n") paragraphs[0].add_run("工作环境:" + str(det['Data']['ImageList']) + "\n") paragraphs[0].add_run("刷新时间:" + det['Data']['RefreshTime'] + "\n") paragraphs[0].add_run("位置经度:" + det['Data']['Longitude'] + "\n") paragraphs[0].add_run("位置纬度:" + det['Data']['Latitude'] + "\n") paragraphs[0].add_run("进入地图:" + "https://mapapi.qq.com/web/mapComponents/locationMarker/v/index.html?marker=coord%3A" + det['Data']['Latitude'] + "%2C" + det['Data']['Longitude'] + "%3Btitle%3A" + det['Data']['EnterpriseName'] + "%3B%27&key=TKUBZ-D24AF-GJ4JY-JDVM2-IBYKK-KEBCU&referer=tengxun&ch=uri-api&ADTAG=uri-api.other" + "\n\n\n\n") document.save('test.docx') #print("企业名称:" + det['Data']['EnterpriseName']) #print("薪资范围:" + det['Data']['WagesView']) #print("联系电话:" + det['Data']['ContanctsPhone']) #print("发布标签:" + str(det['Data']['TagNames'])) #print("来源用户:" + det['Data']['FromUser']) #print("企业地址:" + det['Data']['WorkAddress']) #print("招聘岗位:" + det['Data']['WorkPost']) #print("基本要求:" + det['Data']['WorkRequire']) #print("工作环境:" + str(det['Data']['ImageList'])) #print("刷新时间:" + det['Data']['RefreshTime']) #print("位置经度:" + det['Data']['Longitude']) #print("位置纬度:" + det['Data']['Latitude']) #print("进入地图:" + "https://mapapi.qq.com/web/mapComponents/locationMarker/v/index.html?marker=coord%3A" + det['Data']['Latitude'] + "%2C" + det['Data']['Longitude'] + "%3Btitle%3A" + det['Data']['EnterpriseName'] + "%3B%27&key=TKUBZ-D24AF-GJ4JY-JDVM2-IBYKK-KEBCU&referer=tengxun&ch=uri-api&ADTAG=uri-api.other") t=round(int(round(time.time() * 1000)) / 1e3) d="{\"RecordIndex\":0,\"RecordSize\":10,\"Lng\":114.52082821245915,\"Lat\":38.048684212036896,\"VirtualLng\":120.6174,\"VirtualLat\":31.335106,\"SearchName\":\"服务员\",\"SortType\":1,\"SalaryRoundId\":0,\"IndustryId\":0,\"ProfessionalIds\":[],\"AreaId\":0,\"Pneumonia\":0}" header = { "Content-Type": "application/json; charset=UTF-8", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36", } data = {"AppVer":"1.0.0", "TimeStamp":t, "Lang":"CN", "DeviceName":"web", "DeviceType":"web", "Token":"", "Uid":0, "AppKey":"WKWeb", "Sign":sign(d,t), "Data":d } url = "https://www.wodedagong.com/ls_api/LS_RecruitDataManager/GetRecruitList" res = requests.post(url,data=json.dumps(data), headers=header) data = json.loads(res.text) # print(data['Data']['RecordCount']) index = 0 for i in range(round(data['Data']['RecordCount']/10)): print("爬取页数:" + str(i)) list(index) index = index + 10