本文共 10491 字,大约阅读时间需要 34 分钟。
注意:json读取已支持中文,csv写出用ultraedit打开中文没问题,但如要在excel中支持中文,要另行把csv file另存为 ascii 格式。
package utils;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.FileReader;import java.io.IOException;import java.io.OutputStreamWriter;import java.io.UnsupportedEncodingException;import java.util.ArrayList;import java.util.Iterator;import java.util.List;import org.json.JSONArray;import org.json.JSONObject;public class ReadJson { final static String CO = ","; final static String BR = "\r\n"; final static String READPATH = "D:/personal/knowledge/dataMining/dataset/castle/testdata/practice_10000.json"; final static String WRITEPATH = "D:/personal/knowledge/dataMining/dataset/castle/testdata/practice_10000.csv"; public static void main(String[] args) { ListresumeList = new ArrayList (); List dataList = ReadFile(READPATH); try { Iterator itr = dataList .iterator(); while(itr.hasNext()) { Resume resume = new Resume(); List expList = new ArrayList (); String data = (String) itr.next(); JSONObject jsonObj = new JSONObject(data); // System.out.println(jsonObj); resume.setOid(jsonObj.getJSONObject("_id").getString("$oid")); resume.setAge(jsonObj.getString("age")); resume.setDegree(Integer.parseInt(jsonObj.getString("degree"))); resume.setGender(jsonObj.getString("gender")); resume.setId(jsonObj.getString("id")); resume.setMajor(jsonObj.getString("major")); JSONArray expJsonList = jsonObj.getJSONArray("workExperienceList"); for (int i=0; i < expJsonList.length(); i++) { JSONObject expJsonObj = expJsonList.getJSONObject(i); Exp exp = new Exp(); exp.setDepartment(expJsonObj.getString("department")); exp.setEnd_date(expJsonObj.getString("end_date")); exp.setIndustry(expJsonObj.getString("industry")); exp.setPosition_name(expJsonObj.getString("position_name")); exp.setSalary(Integer.parseInt(expJsonObj.getString("salary"))); exp.setSize(Integer.parseInt(expJsonObj.getString("size"))); exp.setStart_date(expJsonObj.getString("start_date")); exp.setType(expJsonObj.getString("type")); expList.add(exp); } resume.setExp(expList); resumeList.add(resume); } System.out.println("resume size:" + resumeList.size()); writeCsv(resumeList, WRITEPATH); } catch (Exception e) { e.printStackTrace(); } } public static List ReadFile(String path) { List strList = new ArrayList (); File file = new File(path); BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(file)); String tempString = null; while ((tempString = reader.readLine()) != null) { // System.out.println("line"+line+":"+tempString); strList.add(tempString); } reader.close(); } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException el) { } } } return strList; } public static void writeCsv(List resumeList, String path) { File file = new File(path); FileOutputStream out = null; OutputStreamWriter osw = null; BufferedWriter bw = null; try { out = new FileOutputStream(file); osw = new OutputStreamWriter(out, "UTF8"); bw = new BufferedWriter(osw); String headLine = prepareHeadLine(); try { bw.write(headLine); } catch (IOException e) { e.printStackTrace(); } Iterator itr = resumeList.iterator(); while(itr.hasNext()) { Resume res = (Resume)itr.next(); String line = quote(res.getOid()) + CO + quote(res.getAge()) + CO +res.getDegree() + CO + quote(res.getGender()) + CO + quote(res.getId()) + CO + quote(res.getMajor()); Iterator itrExp = res.getExp().iterator(); while(itrExp.hasNext()) { Exp exp = (Exp)itrExp.next(); line = line + CO + quote(exp.getDepartment()) + CO + quote(exp.getEnd_date()) + CO + quote(exp.getIndustry()) + CO + quote(exp.getPosition_name()) + CO + exp.getSalary() + CO + exp.getSize() + CO + quote(exp.getStart_date()) + CO + quote(exp.getType()); } line = line + BR; // System.out.println(line); try { bw.write(line); } catch (IOException e) { e.printStackTrace(); } } } catch (FileNotFoundException e1) { e1.printStackTrace(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } finally { try { bw.close(); osw.close(); out.close(); } catch (IOException e) { e.printStackTrace(); } } } public static String prepareHeadLine() { String headLine = "oid,age,degree,gender,id,major,"; String expHeadLine = "department,end_date,industry,position_name,salary,size,start_date,type,"; for (int i=0; i<15;i++) headLine += expHeadLine; headLine = headLine.substring(0,headLine.length()-1); headLine += BR; return headLine; } public static String quote(String subject) { return '"' + subject + '"'; }}
Sample JSON file (三行):
{"_id":{"$oid":"558d2bb91dc76a2178188c3e"},"age":"29","degree":1,"gender":"男","id":"558d2bb91dc76a2178188c3e","major":"电子商务","workExperienceList":[{"department":null,"end_date":"2015-03","industry":"计算机硬件 ","position_name":"电子商务经理","salary":4,"size":6,"start_date":"2014-11","type":"-"},{"department":null,"end_date":"2014-10","industry":"互联网/电子商务 ","position_name":"运营经理","salary":3,"size":4,"start_date":"2012-08","type":"-"},{"department":null,"end_date":"2012-07","industry":"房地产/建筑/建材/工程 ","position_name":"运营经理","salary":2,"size":4,"start_date":"2010-07","type":"-"},{"department":null,"end_date":"2010-05","industry":"互联网/电子商务 ","position_name":"运营专员","salary":2,"size":7,"start_date":"2009-11","type":"-"}]}{"_id":{"$oid":"558d2bb91dc76a2178188c40"},"age":"28","degree":1,"gender":"男","id":"558d2bb91dc76a2178188c40","major":"计算机科学与技术","workExperienceList":[{"department":null,"end_date":"至今","industry":"IT服务(系统/数据/维护) ","position_name":"技术支持","salary":1,"size":4,"start_date":"2014-10","type":"-"},{"department":null,"end_date":"2014-08","industry":"基金/证券/期货/投资 ","position_name":"信息技术专员","salary":2,"size":2,"start_date":"2014-03","type":"-"},{"department":null,"end_date":"2014-03","industry":"房地产/建筑/建材/工程 ","position_name":"信息技术专员","salary":1,"size":7,"start_date":"2011-11","type":"-"}]}{"_id":{"$oid":"558d2bba1dc76a2178188c6f"},"age":"29","degree":1,"gender":"男","id":"558d2bba1dc76a2178188c6f","major":"计算机信息管理","workExperienceList":[{"department":null,"end_date":"至今","industry":"计算机软件 ","position_name":"软件测试","salary":3,"size":7,"start_date":"2013-11","type":"-"},{"department":null,"end_date":"2013-11","industry":"计算机软件 ","position_name":"软件测试","salary":2,"size":6,"start_date":"2012-11","type":"-"},{"department":null,"end_date":"2012-06","industry":"计算机软件 ","position_name":"开发工程师","salary":2,"size":6,"start_date":"2009-12","type":"-"}]}JSON结构:
$ _id :List of 1 ..$ $oid: chr "558d2bb91dc76a2178188c3e" $ age : chr "29" $ degree : num 1 $ gender : chr "\xc4\xd0" $ id : chr "558d2bb91dc76a2178188c3e" $ major : chr " \xe7\xd7\xd3\xc9\xcc\xce\xf1""| __truncated__ $ workExperienceList:List of 4 ..$ :List of 8 .. ..$ department : NULL .. ..$ end_date : chr "2015-03" .. ..$ industry : chr " \xc6\xcb\xe3 \xfa \xfe ""| __truncated__ .. ..$ position_name: chr " \xe7\xd7\xd3\xc9\xcc\xce\xf1 \xc0\xed""| __truncated__ .. ..$ salary : num 4 .. ..$ size : num 6 .. ..$ start_date : chr "2014-11" .. ..$ type : chr "-" ..$ :List of 8 .. ..$ department : NULL .. ..$ end_date : chr "2014-10" .. ..$ industry : chr " \xcd\xf8/ \xe7\xd7\xd3\xc9\xcc\xce\xf1 \0""| __truncated__ .. ..$ position_name: chr "\xd4\xcb \xc0\xed""| __truncated__ .. ..$ salary : num 3 .. ..$ size : num 4 .. ..$ start_date : chr "2012-08" .. ..$ type : chr "-"。。。
辅助类参考:
- Exp
public class Exp { String department; String end_date; String industry; String position_name; Integer salary; Integer size; String start_date; String type;...- Resume
public class Resume { String oid; String age; Integer degree; String gender; String id; String major; Listexp;...
- 附上CSV读取(同时解决中文支持问题)
public static List- 附上CSV更新(更新文件其实就是 读取-append-写入)readCsv(String path) { List posRawList = new ArrayList (); File csv = new File(path); InputStreamReader isr = null; BufferedReader br = null; try { isr = new InputStreamReader(new FileInputStream(csv), "UTF-8"); br = new BufferedReader(isr); String line = ""; try { int lineIdx = 1; while ((line = br.readLine()) != null) { StringTokenizer st = new StringTokenizer(line, ","); int tokenIdx = 1; while (st.hasMoreTokens()) { String nextToken = st.nextToken(); if (lineIdx != 1 && tokenIdx == 10) { // only process position field (exclude the header) System.out.print(nextToken + "\t"); posRawList.add(nextToken); } tokenIdx += 1; } lineIdx += 1; } } catch (IOException e) { e.printStackTrace(); } } catch (UnsupportedEncodingException e1) { e1.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } finally { try { if (br!=null) br.close(); if (isr!=null) isr.close(); } catch (IOException e) { e.printStackTrace(); } } return posRawList; }
public static void updateCsv(ListposGradeList, String path) { StringBuffer nContent = new StringBuffer(); InputStreamReader isr = null; BufferedReader br = null; FileOutputStream fileOs = null; try { isr = new InputStreamReader(new FileInputStream(path), "UTF-8"); br = new BufferedReader(isr); String line = ""; try { int lineIdx = 0; while((line = br.readLine()) != null){ nContent.append(line); if(lineIdx != 0 && lineIdx < posGradeList.size()){ // append position grade field (exclude the header) nContent.append(",\""+posGradeList.get(lineIdx)+"\""); } nContent.append("\r\n"); lineIdx += 1; } } catch (IOException e) { e.printStackTrace(); } } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } finally { try { if (br!=null) br.close(); if (isr!=null) isr.close(); } catch (IOException e) { e.printStackTrace(); } } try { fileOs = new FileOutputStream(new File(path), false); try { fileOs.write(nContent.toString().getBytes()); } catch (IOException e) { e.printStackTrace(); } } catch (FileNotFoundException e) { e.printStackTrace(); } finally { try { if (fileOs!=null) fileOs.close(); } catch (IOException e) { e.printStackTrace(); } } }
转载地址:http://bbili.baihongyu.com/