Monday, July 24, 2017

Solr 6: Adding Documents using a SolrJ Client Application

I created a SolrJ client application to populate documents shown on a previous post about Solr schemaless.  This application includes all necessary jar files for the SoltJ applications: solr-solrj-6.6.0.jar in a <SolrInstalled>/dist directory and all jar files under a <SolrInstalled>/dist/solrj-lib directory.

UserCreateMain.java

import java.util.ArrayList;
import java.util.List;

public class UserCreateMain {

  public static void main(String[] args) {    
    int totalDoc = 3_000_000;
    int docPerThread = 1_000_000;
    
    List<SolrDocPojo> docList = new ArrayList<>();
    for(int numOfDoc = 1; numOfDoc <= totalDoc; numOfDoc++){
      UserDoc user = new UserDoc();
      user.populateFields();
      docList.add(user);
    
      if(numOfDoc%docPerThread == 0){
        (new SolrImport(docList)).start();
        docList=new ArrayList<>();
      }
    }
    
    if(docList.size()>0){
      (new SolrImport(docList)).start();
    }
  }
}

SolrDocPojo.java
import org.apache.solr.common.SolrInputDocument;

public interface SolrDocPojo {
  public SolrInputDocument converToSolrDoc();
  public void populateFields();
}

UserDoc.java
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.UUID;

import org.apache.solr.common.SolrInputDocument;

public class UserDoc implements SolrDocPojo {
  private String id;
  private String firstName;
  private String lastName;
  private Integer birthYear;
  private String companyName;
  private String state;
  private Set<String> permission = new HashSet<>();
  
  private static final Random RANDOM = new Random();
  private static final String[] COMPANIES = {"Google", "FB", "Samsung", "Intel", "Netflex", 
      "Micro", "Zions", "OC Tanner", "GE", "Goldman", "Aegen", "GlaxoSmithKline", "Ford"};
  
  public UserDoc() {
    super();
    id = UUID.randomUUID().toString();
  }


  @Override
  public SolrInputDocument converToSolrDoc() {
    SolrInputDocument solrDoc = new SolrInputDocument();
    solrDoc.setField("id", id);
    solrDoc.setField("firstName", firstName);
    solrDoc.setField("lastName", lastName);
    solrDoc.setField("birthYear", birthYear);
    solrDoc.setField("companyName", companyName);
    solrDoc.setField("state", state);
    solrDoc.setField("permission", permission);
    return solrDoc;
  }

  @Override
  public void populateFields() {
    state = US.randomState().getANSIAbbreviation();
    birthYear = 1930 + RANDOM.nextInt(80);
    companyName = COMPANIES[RANDOM.nextInt(COMPANIES.length)];

    int firstSeparator = id.indexOf('-');
    firstName = "first" + id.substring(0, firstSeparator);
    lastName = "last" + id.substring(firstSeparator+1, id.indexOf('-', firstSeparator+1));
    
    int numOfPermission = RANDOM.nextInt(11); //0~10
    for(int i=0; i<numOfPermission; i++){ //max 10 permissions
      permission.add("permission"+RANDOM.nextInt(10));
    }
  }

  //Getters and Setters are omitted
}

SolrImport.java
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;


public class SolrImport extends Thread {
  final int SOLR_BATCH_SIZE = 2000;

  List<SolrDocPojo> docList = null;

  public SolrImport(List<SolrDocPojo> docList) {
    super();
    this.docList = docList;
  }

  public void run(){
    List<SolrInputDocument> inputList =
        new ArrayList<SolrInputDocument>();

    boolean commit = false;
    for(SolrDocPojo doc: docList){
      inputList.add(doc.converToSolrDoc());
      if( inputList.size() % SOLR_BATCH_SIZE == 0){
        sendToSolr(inputList, commit);
        inputList.clear();
        commit = !commit;
        System.out.println("sendToSolr executed");
      }
    }

    if(inputList.size() > 0){
      sendToSolr(inputList, true);
      inputList.clear();
    }

    System.out.println("done");
  }

  private void sendToSolr(List<SolrInputDocument> docList, boolean commit) {
    try {
      SolrEndPoint.client.add(docList);
      if(commit)
        SolrEndPoint.client.commit();

    } catch (SolrServerException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}

US.java
This enum class defines all US state names. Except the 'randomState' method, this code is from https://github.com/AustinC/UnitedStates/blob/master/src/main/java/unitedstates/US.java

import java.util.Arrays;
import java.util.List;
import java.util.Random;

public enum US {
  ALABAMA("Alabama","AL","US-AL"),
  ALASKA("Alaska","AK","US-AK"),
  ARIZONA("Arizona","AZ","US-AZ"),
  ARKANSAS("Arkansas","AR","US-AR"),
  CALIFORNIA("California","CA","US-CA"),
  COLORADO("Colorado","CO","US-CO"),
  CONNECTICUT("Connecticut","CT","US-CT"),

  //Omitted

  WYOMING("Wyoming","WY","US-WY"),
  PUERTO_RICO("Puerto Rico","PR","US-PR");

  private static final List<US> VALUES = Arrays.asList(values());
  private static final int SIZE = VALUES.size();
  private static final Random RANDOM = new Random();

  public static US randomState()  {
    return VALUES.get(RANDOM.nextInt(SIZE));
  }
  
  //Omitted
}

SolrEndPoint.java
import org.apache.solr.client.solrj.impl.HttpSolrClient;

public class SolrEndPoint {
  static final HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8983/solr/schemaless").build();  
}


No comments:

Post a Comment

Java 9: Flow - Reactive Programming

Programming world has always been changed fast enough and many programming / design paradigms have been introduced such as object oriented p...