UserCreateMain.java
import java.util.ArrayList; import java.util.List; public class UserCreateMain { public static void main(String[] args) { int totalDoc = 3_000_000; int docPerThread = 1_000_000; List<SolrDocPojo> docList = new ArrayList<>(); for(int numOfDoc = 1; numOfDoc <= totalDoc; numOfDoc++){ UserDoc user = new UserDoc(); user.populateFields(); docList.add(user); if(numOfDoc%docPerThread == 0){ (new SolrImport(docList)).start(); docList=new ArrayList<>(); } } if(docList.size()>0){ (new SolrImport(docList)).start(); } } }
SolrDocPojo.java
import org.apache.solr.common.SolrInputDocument; public interface SolrDocPojo { public SolrInputDocument converToSolrDoc(); public void populateFields(); }
UserDoc.java
import java.util.HashSet; import java.util.Random; import java.util.Set; import java.util.UUID; import org.apache.solr.common.SolrInputDocument; public class UserDoc implements SolrDocPojo { private String id; private String firstName; private String lastName; private Integer birthYear; private String companyName; private String state; private Set<String> permission = new HashSet<>(); private static final Random RANDOM = new Random(); private static final String[] COMPANIES = {"Google", "FB", "Samsung", "Intel", "Netflex", "Micro", "Zions", "OC Tanner", "GE", "Goldman", "Aegen", "GlaxoSmithKline", "Ford"}; public UserDoc() { super(); id = UUID.randomUUID().toString(); } @Override public SolrInputDocument converToSolrDoc() { SolrInputDocument solrDoc = new SolrInputDocument(); solrDoc.setField("id", id); solrDoc.setField("firstName", firstName); solrDoc.setField("lastName", lastName); solrDoc.setField("birthYear", birthYear); solrDoc.setField("companyName", companyName); solrDoc.setField("state", state); solrDoc.setField("permission", permission); return solrDoc; } @Override public void populateFields() { state = US.randomState().getANSIAbbreviation(); birthYear = 1930 + RANDOM.nextInt(80); companyName = COMPANIES[RANDOM.nextInt(COMPANIES.length)];
int firstSeparator = id.indexOf('-'); firstName = "first" + id.substring(0, firstSeparator); lastName = "last" + id.substring(firstSeparator+1, id.indexOf('-', firstSeparator+1));
int numOfPermission = RANDOM.nextInt(11); //0~10 for(int i=0; i<numOfPermission; i++){ //max 10 permissions permission.add("permission"+RANDOM.nextInt(10)); } } //Getters and Setters are omitted }
SolrImport.java
import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrInputDocument; public class SolrImport extends Thread { final int SOLR_BATCH_SIZE = 2000; List<SolrDocPojo> docList = null; public SolrImport(List<SolrDocPojo> docList) { super(); this.docList = docList; } public void run(){ List<SolrInputDocument> inputList = new ArrayList<SolrInputDocument>(); boolean commit = false; for(SolrDocPojo doc: docList){ inputList.add(doc.converToSolrDoc()); if( inputList.size() % SOLR_BATCH_SIZE == 0){ sendToSolr(inputList, commit); inputList.clear(); commit = !commit; System.out.println("sendToSolr executed"); } } if(inputList.size() > 0){ sendToSolr(inputList, true);
inputList.clear(); } System.out.println("done"); } private void sendToSolr(List<SolrInputDocument> docList, boolean commit) { try { SolrEndPoint.client.add(docList); if(commit) SolrEndPoint.client.commit(); } catch (SolrServerException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
US.java
This enum class defines all US state names. Except the 'randomState' method, this code is from https://github.com/AustinC/UnitedStates/blob/master/src/main/java/unitedstates/US.java
import java.util.Arrays; import java.util.List; import java.util.Random; public enum US { ALABAMA("Alabama","AL","US-AL"), ALASKA("Alaska","AK","US-AK"), ARIZONA("Arizona","AZ","US-AZ"), ARKANSAS("Arkansas","AR","US-AR"), CALIFORNIA("California","CA","US-CA"), COLORADO("Colorado","CO","US-CO"), CONNECTICUT("Connecticut","CT","US-CT"), //Omitted WYOMING("Wyoming","WY","US-WY"), PUERTO_RICO("Puerto Rico","PR","US-PR"); private static final List<US> VALUES = Arrays.asList(values()); private static final int SIZE = VALUES.size(); private static final Random RANDOM = new Random(); public static US randomState() { return VALUES.get(RANDOM.nextInt(SIZE)); } //Omitted }
SolrEndPoint.java
import org.apache.solr.client.solrj.impl.HttpSolrClient; public class SolrEndPoint { static final HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8983/solr/schemaless").build(); }
No comments:
Post a Comment