|
import json import requests import time '''创建数据库表保存爬取到的数据''' import pymysql #打开数据库连接 db=pymysql.connect("您的主机ip","用户名","密码","数据库名",charset="utf8") #创建游标 cursor=db.cursor() #执行sql语句,创建表,如果表存在,则删除 cursor.execute("drop table if exists Community") #创建表的sql语句 sql=''' create table Community( i int(8) not null AUTO_INCREMENT, id varchar(500), biz_type varchar(500), name varchar(500), type varchar(500), address varchar(500), tel varchar(500), location varchar(500), p_code varchar(500), p_name varchar(500), city_code varchar(500), city_name varchar(500), ad_code varchar(500), ad_name varchar(500), business_area varchar(500), PRIMARY KEY(i) )engine=MyISAM AUTO_INCREMENT=1 default CHARSET=utf8; ''' cursor.execute(sql) #获取数据,并把数据保存到数据表中 def get_data(page_index, url_amap): global total_record time.sleep(0.5) print('解析页码: ' + str(page_index) + '... ...') # 获取第page_index页的网址 url = url_amap.replace('page_index', str(page_index)) # 对需要爬取的网页发送请求 response = requests.get(url) # 获取爬出来的数据 poi_json = response.json() if total_record == 0: total_record = int(poi_json.get('count', 0)) poi_lists = poi_json.get("pois") if poi_lists != None or '': for poi in poi_lists: poi_dict = {} poi_dict["id"] = poi.get('id') poi_dict["biz_type"] = poi.get('biz_type') poi_dict["name"] = poi.get('name') poi_dict["type"] = poi.get('type') poi_dict["address"] = poi.get('address') poi_dict["tel"] = poi.get('tel') poi_dict["location"] = poi.get('location') poi_dict["pcode"] = poi.get('pcode') poi_dict["pname"] = poi.get('pname') poi_dict["citycode"] = poi.get('citycode') poi_dict["cityname"] = poi.get('cityname') poi_dict["adcode"] = poi.get('adcode') poi_dict["adname"] = poi.get('adname') poi_dict["business_area"] = poi.get('business_area') # 添加数据到mysql数据库 cursor.execute("INSERT INTO Community(id,biz_type,name,type,address,tel,location,p_code,p_name,city_code,city_name,ad_code,ad_name,business_area) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(str(poi_dict["id"]),str(poi_dict["biz_type"]),str(poi_dict["name"]),str(poi_dict["type"]),str(poi_dict["address"]),str(poi_dict["tel"]),str(poi_dict["location"]),str(poi_dict["pcode"]),str(poi_dict["pname"]),str(poi_dict["citycode"]),str(poi_dict["cityname"]),str(poi_dict["adcode"]),str(poi_dict["adname"]),str(poi_dict["business_area"]))) else: pass return poi_json.get("pois") def getPOIdata(page_size, url_amap): global total_record print("获取POI数据开始") json_data = get_data(1, url_amap) if (total_record / page_size) != 0: page_number = int(total_record / page_size) + 2 else: page_number = int(total_record / page_size) + 1 for each_page in range(2, page_number): get_data(each_page, url_amap) if __name__ == '__main__': city = [] data = open("city.json", encoding="utf-8-sig") strJson = json.load(data) for i in range(len(strJson)): city.append(strJson[i]['adcode']) #'API密匙',输入密匙列表,免费密匙一天之内访问2000次,所以创建了多个密匙去访问 key = [ 您的API密匙 ] for y in range(0, len(city)): if 0<=y<=1900: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[0] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 1901<=y<=2000: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[1] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2001<=y<=2100: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[2] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2101<=y<=2200: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[3] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2201<=y<=2300: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[4] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2301<=y<=2400: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[5] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2401<=y<=2500: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[6] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2501<=y<=2600: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[7] + '&keywords=&types=120302' + '&city=' + city[y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2601<=y<=2700: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[8] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2701<=y<=2800: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[9] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2801<=y<=2900: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[10] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 2901<=y<=3000: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[11] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3001<=y<=3100: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[12] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3101<=y<=3200: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[13] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3201<=y<=3300: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[14] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3301<=y<=3400: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[15] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3401<=y<=3500: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[16] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' elif 3501<=y<=3600: print(y) url_amap = 'http://restapi.amap.com/v3/place/text?key='+key[17] + '&keywords=&types=120302' + '&city=' + city[ y] + '&citylimit=true&children=1&offset=20&page=page_index&extensions=all' else: print(y) continue page_size = 20 page_index = r'page=1' global total_record #总的数据量 total_record = 0 getPOIdata(page_size, url_amap) #关闭游标 cursor.close() #关闭数据库 db.close() |
下载地址:本地下载