Java如何实现获取行政区划
短信预约 -IT技能 免费直播动态提醒
今天小编给大家分享一下Java如何实现获取行政区划的相关知识点,内容详细,逻辑清晰,相信大部分人都还太了解这方面的知识,所以分享这篇文章给大家参考一下,希望大家阅读完这篇文章后有所收获,下面我们一起来了解一下吧。
一、导入jar包
下面是笔者用到的全部jar包
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.9</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> <version>30.1.1-jre</version> </dependency> <dependency> <groupId>cn.hutool</groupId> <artifactId>hutool-json</artifactId> <version>5.4.0</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.44</version> </dependency> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.14.3</version> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.5</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> <version>2.5.4</version> </dependency>
这里说下,maven配置要从阿里云下载jar,若是从中央仓库将会非常的慢。
二、代码展示
这里是代码的展示,笔者是网上搜的代码改造的,不然网站有反爬,大概爬取2000条左右就会中断,笔者加了延时这样就避开了反爬(可能还有别的规避措施)。这里爬取的是4级行政区划:省、市、区县、街道
package com.cheng.controller;import org.apache.poi.ss.usermodel.Row;import org.apache.poi.xssf.streaming.SXSSFSheet;import org.apache.poi.xssf.streaming.SXSSFWorkbook;import org.jsoup.Jsoup;import org.jsoup.nodes.Document;import org.jsoup.select.Elements;import java.io.FileOutputStream;import java.io.IOException;import java.net.ConnectException;import java.net.SocketTimeoutException;import java.util.*;public class JsoupTestPluMdm {static int i = 1;static String url1 = “http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022”;static String url2 = “”;public static void main(String[] args) throws IOException { try{ List<Map<String,String>> listMap = new ArrayList<>(); Document document = Jsoup.connect(url1) .header("Cookie", "wzws_sessionid=oGQAAyWBMmNlMWZkgjdlZDJkMIAyMjEuMjM4LjEzMi41MA==; SF_cookie_1=15502425; wzws_cid=6e8cdc0aea81349b05c8a0b6c05cd7204b6e0f10e5a48d462175473d23abcb4891edf1ceb73464398cb1ce7e6f53999f7545dd0014a15b1fb4eec5c6cf37421f0c2b08528de36f728ec4c676ed264c7d") .get(); //获取他所有的省 Elements elements = document.select("body > table:nth-child(3) > tbody > tr:nth-child(1) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody"); //解析省的超链接 Elements elements1 = elements.select("tbody > tr > td > a"); for(int j=0;j<elements1.size();j++){// Thread.sleep(100);String s = elements1.get(j).select(“a”).attr(“href”);String provinceCode = s.replaceAll(“.html”,“”)+“0000”;System.out.println(“省代码:” + provinceCode);String provinceName = elements1.get(j).text();System.out.println(“省名称:” + provinceName);Map<String,String> map = new HashMap<>();map.put(provinceCode,provinceName);listMap.add(map);} for (int i1 = 0; i1 <31; i1++) { System.out.println("**********************i********************:"+i); if(i%1000==0){ Thread.sleep(1000*60*10); } Map<String, String> stringStringMap = listMap.get(i1); Iterator<Map.Entry<String, String>> iterator = stringStringMap.entrySet().iterator(); while(iterator.hasNext()){ Map.Entry<String,String> entry = iterator.next(); String provinceCode = entry.getKey(); String provinceName = entry.getValue(); String index = provinceCode.substring(0,2)+".html"; SXSSFWorkbook wb = new SXSSFWorkbook(100); SXSSFSheet sheet = (SXSSFSheet) wb.createSheet(); // TODO 这里改成自己的地址即可,也可以存放到一个文件里 String enterFileName = "C:\\Users\\pcc\\Desktop\\xingzhengquhua\\"+provinceName+".xlsx"; FileOutputStream fileOut = new FileOutputStream(enterFileName); Row row = sheet.createRow(0); sheet.createRow(i).createCell(0).setCellValue(provinceCode);// id sheet.getRow(i).createCell(1).setCellValue(provinceName);// name sheet.getRow(i).createCell(2).setCellValue(""); // pid sheet.getRow(i).createCell(3).setCellValue("1"); // type i++; try { jsoupList2(url1 + "/" + index, provinceName, provinceCode, sheet); } catch (SocketTimeoutException e) { e.printStackTrace(); jsoupList2(url1 + "/" + index, provinceName, provinceCode, sheet); } catch (ConnectException e) { e.printStackTrace(); jsoupList2(url1 + "/" + index, provinceName, provinceCode, sheet); } row.createCell(0).setCellValue("id"); row.createCell(1).setCellValue("district_name"); row.createCell(2).setCellValue("pid"); row.createCell(3).setCellValue("type"); wb.write(fileOut); fileOut.close(); } } }catch (Exception e){ e.printStackTrace(); }finally { }}//市级页面public static void jsoupList2(String url,String provinceName,String provinceCode,SXSSFSheet sheet) throws Exception { String cityName = ""; String cityCode = ""; url2 = url.replace(".html",""); Document document = Jsoup.connect(url).get(); Elements elements = document.select("body > table:nth-child(3) > tbody > tr:nth-child(1) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody"); Elements elements1 = elements.select("tbody > tr > td"); //j从2开始是因为他有个表头 统计用区划代码 名称 for (int j = 2; j < elements1.size(); j++) { System.out.println("**********************i********************:"+i); if(i%1000==0){ Thread.sleep(1000*60*10); }// Thread.sleep(500);//判断是否是超链接,不是超链接也要获取数据if(elements1.get(j).select(“td > a”).toString().equals(“”)){String text = elements1.get(j).text();if (j % 2 == 0) {System.out.println(“市代码:” + text);sheet.createRow(i).createCell(0).setCellValue(text);} else {System.out.println(“市名称:” + text);sheet.getRow(i).createCell(1).setCellValue(text);sheet.getRow(i).createCell(2).setCellValue(provinceCode);sheet.getRow(i).createCell(3).setCellValue(“3”);i++;}}else {Elements elements2 = elements1.get(j).select(“td > a”);for (int j1 = 0; j1 < elements2.size(); j1++) {String text = elements2.get(j1).text();if (j % 2 == 0) {System.out.println(“市代码:” + text);cityCode = text;sheet.createRow(i).createCell(0).setCellValue(text);} else {System.out.println(“市名称:” + text);cityName = text;sheet.getRow(i).createCell(1).setCellValue(text);sheet.getRow(i).createCell(2).setCellValue(provinceCode);sheet.getRow(i).createCell(3).setCellValue(“2”);i++;String s = elements2.get(j1).select(“a”).attr(“href”);//TODO 这里排除了海南的几个市区,更改为不排除任何市区if(true) {try {jsoupList3(url1 + “/” + s,cityName,cityCode,provinceName,provinceCode, sheet);} catch (SocketTimeoutException e) {e.printStackTrace();jsoupList3(url1 + “/” + s,cityName,cityCode,provinceName,provinceCode, sheet);} catch (ConnectException e) {e.printStackTrace();jsoupList3(url1 + “/” + s,cityName,cityCode,provinceName,provinceCode, sheet);}}}}}}}//县级页面public static void jsoupList3(String url,String cityName,String cityCode,String provinceName,String provinceCode,SXSSFSheet sheet) throws Exception {Document document = Jsoup.connect(url).get();Elements elements = document.select(“body > table:nth-child(3) > tbody > tr:nth-child(1) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody”);Elements elements1 = elements.select(“tbody > tr > td”);String xianName = “”;String xianCode = “”;//j从2开始是因为他有个表头 统计用区划代码 名称for (int j = 2; j < elements1.size(); j++) {System.out.println(“i:"+i);if(i%1000==0){Thread.sleep(10006010);}// Thread.sleep(500);//判断是否是超链接,不是超链接也要获取数据if(elements1.get(j).select(“td > a”).toString().equals(”“)){String text = elements1.get(j).text();if (j % 2 == 0) {System.out.println(“县代码:” + text);sheet.createRow(i).createCell(0).setCellValue(text);} else {System.out.println(“县名称:” + text);sheet.getRow(i).createCell(1).setCellValue(text);sheet.getRow(i).createCell(2).setCellValue(cityCode);sheet.getRow(i).createCell(3).setCellValue(“3”);i++;}}else {Elements elements2 = elements1.get(j).select(“td > a”);for (int j1 = 0; j1 < elements2.size(); j1++) {String text = elements2.get(j1).text();xianName = text;if (j % 2 == 0) {xianCode = text;System.out.println(“县代码:” + xianCode);sheet.createRow(i).createCell(0).setCellValue(text);} else {System.out.println(“县名称:” + text);sheet.getRow(i).createCell(1).setCellValue(text);sheet.getRow(i).createCell(2).setCellValue(cityCode);sheet.getRow(i).createCell(3).setCellValue(“3”);i++;String s = elements2.get(j1).select(“a”).attr(“href”);try {jsoupList4(url1 + “/” +provinceCode.substring(0,2)+”/“+ s,xianName,xianCode,cityName,cityCode,provinceName,provinceCode, sheet);} catch (SocketTimeoutException e) {e.printStackTrace();jsoupList4(url1 + “/” +provinceCode.substring(0,2)+”/“+ s,xianName,xianCode,cityName,cityCode,provinceName,provinceCode, sheet);} catch (ConnectException e) {e.printStackTrace();jsoupList4(url1 + “/” +provinceCode.substring(0,2)+”/"+ s,xianName,xianCode,cityName,cityCode,provinceName,provinceCode, sheet);}}}}}}//街道页面public static void jsoupList4(String url,String xianName,String xianCode,String cityName,String cityCode,String provinceName,String provinceCode,SXSSFSheet sheet) throws Exception { Document document = Jsoup.connect(url).get(); Elements elements = document.select("body > table:nth-child(3) > tbody > tr:nth-child(1) > td > table > tbody > tr:nth-child(2) > td > table > tbody > tr > td > table > tbody"); Elements elements1 = elements.select("tbody > tr > td"); //j从2开始是因为他有个表头 统计用区划代码 名称 for (int j = 2; j < elements1.size(); j++) { System.out.println("**********************i********************:"+i); if(i%1000==0){ Thread.sleep(1000*60*10); }// Thread.sleep(500);//判断是否是超链接,不是超链接也要获取数据if(elements1.get(j).select(“td > a”).toString().equals(“”)){String text = elements1.get(j).text();if (j % 2 == 0) {System.out.println(“街道代码:” + text);sheet.createRow(i).createCell(0).setCellValue(text);} else {System.out.println(“街道名称:” + text);sheet.getRow(i).createCell(1).setCellValue(text);sheet.getRow(i).createCell(2).setCellValue(xianCode);sheet.getRow(i).createCell(3).setCellValue(“4”);i++;}}else {Elements elements2 = elements1.get(j).select(“td > a”);for (int j1 = 0; j1 < elements2.size(); j1++) {String text = elements2.get(j1).text();if (j % 2 == 0) {System.out.println(“街道代码:” + text);// TODO 这里不能截取,不然街道界别数据截不全sheet.createRow(i).createCell(0).setCellValue(text);} else {System.out.println(“街道名称:” + text);sheet.getRow(i).createCell(1).setCellValue(text);sheet.getRow(i).createCell(2).setCellValue(xianCode);sheet.getRow(i).createCell(3).setCellValue(“4”);i++;}}}}}}
以上就是“Java如何实现获取行政区划”这篇文章的所有内容,感谢各位的阅读!相信大家阅读完这篇文章都有很大的收获,小编每天都会为大家更新不同的知识,如果还想学习更多的知识,请关注编程网行业资讯频道。
免责声明:
① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。
② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341