1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import json import requests import time '''创建数据库表保存爬取到的数据''' import pymysql #打开数据库连接 db=pymysql.connect("您的主机ip","用户名","密码","数据库名",charset="utf8") #创建游标 cursor=db.cursor() #执行sql语句,创建表,如果表存在,则删除 cursor.execute("drop table if exists Community") #创建表的sql语句 sql=''' create table Community( i int(8) not null AUTO_INCREMENT, id varchar(500), biz_type varchar(500), name varchar(500), type varchar(500), address varchar(500), tel varchar(500), location varchar(500), p_code varchar(500), p_name varchar(500), city_code varchar(500), city_name varchar(500), ad_code varchar(500), ad_name varchar(500), business_area varchar(500), PRIMARY KEY(i) )engine=MyISAM AUTO_INCREMENT=1 default CHARSET=utf8; ''' cursor.execute(sql) #获取数据,并把数据保存到数据表中 def get_data(page_index, url_amap): global total_record time.sleep(0.5) print('解析页码: ' + str(page_index) + '... ...') # 获取第page_index页的网址 url = url_amap.replace('page_index', str(page_index)) # 对需要爬取的网页发送请求 response = requests.get(url) # 获取爬出来的数据 poi_json = response.json() if total_record == 0: total_record = int(poi_json.get('count', 0)) poi_lists = poi_json.get("pois") if poi_lists != None or '': for poi in poi_lists: poi_dict = {} poi_dict["id"] = poi.get('id') poi_dict["biz_type"] = poi.get('biz_type') poi_dict["name"] = poi.get('name') poi_dict["type"] = poi.get('type') poi_dict["address"] = poi.get('address') poi_dict["tel"] = poi.get('tel') poi_dict["location"] = poi.get('location') poi_dict["pcode"] = poi.get('pcode') poi_dict["pname"] = poi.get('pname') poi_dict["citycode"] = poi.get('citycode') poi_dict["cityname"] = poi.get('cityname') poi_dict["adcode"] = poi.get('adcode') poi_dict["adname"] = poi.get('adname') poi_dict["business_area"] = poi.get('business_area') # 添加数据到mysql数据库 cursor.execute("INSERT INTO Community(id,biz_type,name,type,address,tel,location,p_code,p_name,city_code,city_name,ad_code,ad_name,business_area) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(str(poi_dict["id"]),str(poi_dict["biz_type"]),str(poi_dict["name"]),str(poi_dict["type"]),str(poi_dict["address"]),str(poi_dict["tel"]),str(poi_dict["location"]),str(poi_dict["pcode"]),str(poi_dict["pname"]),str(poi_dict["citycode"]),str(poi_dict["cityname"]),str(poi_dict["adcode"]),str(poi_dict["adname"]),str(poi_dict["business_area"]))) else: pass return poi_json.get("pois") def getPOIdata(page_size, url_amap): global total_record print("获取POI数据开始") json_data = get_data(1, url_amap) if (total_record / page_size) != 0: page_number = int(total_record / page_size) + 2 else: page_number = int(total_record / page_size) + 1 for each_page in range(2, page_number): get_data(each_page, url_amap) if __name__ == '__main__': city = [] data = open("city.json", encoding="utf-8-sig") strJson = json.load(data) for i in range(len(strJson)): city.append(strJson[i]['adcode']) #'API密匙',输入密匙列表,免费密匙一天之内访问2000次,所以创建了多个密匙去访问 key = [ 您的API密匙 ] for y in range(0, len(city)): if 0<=y<=1900: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[0] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 1901<=y<=2000: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[1] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2001<=y<=2100: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[2] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2101<=y<=2200: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[3] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2201<=y<=2300: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[4] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2301<=y<=2400: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[5] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2401<=y<=2500: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[6] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2501<=y<=2600: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[7] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2601<=y<=2700: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[8] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2701<=y<=2800: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[9] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2801<=y<=2900: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[10] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2901<=y<=3000: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[11] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3001<=y<=3100: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[12] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3101<=y<=3200: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[13] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3201<=y<=3300: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[14] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3301<=y<=3400: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[15] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3401<=y<=3500: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[16] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3501<=y<=3600: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[17] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' else: print(y) continue page_size = 20 page_index = r'page=1' global total_record #总的数据量 total_record = 0 getPOIdata(page_size, url_amap) #关闭游标 cursor.close() #关闭数据库 db.close() |
下载地址:本地下载