Monday, July 24, 2017

Solr 6: Adding Documents using a SolrJ Client Application

I created a SolrJ client application to populate documents shown on a previous post about Solr schemaless.  This application includes all necessary jar files for the SoltJ applications: solr-solrj-6.6.0.jar in a <SolrInstalled>/dist directory and all jar files under a <SolrInstalled>/dist/solrj-lib directory.

UserCreateMain.java

import java.util.ArrayList;
import java.util.List;

public class UserCreateMain {

  public static void main(String[] args) {    
    int totalDoc = 3_000_000;
    int docPerThread = 1_000_000;
    
    List<SolrDocPojo> docList = new ArrayList<>();
    for(int numOfDoc = 1; numOfDoc <= totalDoc; numOfDoc++){
      UserDoc user = new UserDoc();
      user.populateFields();
      docList.add(user);
    
      if(numOfDoc%docPerThread == 0){
        (new SolrImport(docList)).start();
        docList=new ArrayList<>();
      }
    }
    
    if(docList.size()>0){
      (new SolrImport(docList)).start();
    }
  }
}

SolrDocPojo.java
import org.apache.solr.common.SolrInputDocument;

public interface SolrDocPojo {
  public SolrInputDocument converToSolrDoc();
  public void populateFields();
}

UserDoc.java
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.UUID;

import org.apache.solr.common.SolrInputDocument;

public class UserDoc implements SolrDocPojo {
  private String id;
  private String firstName;
  private String lastName;
  private Integer birthYear;
  private String companyName;
  private String state;
  private Set<String> permission = new HashSet<>();
  
  private static final Random RANDOM = new Random();
  private static final String[] COMPANIES = {"Google", "FB", "Samsung", "Intel", "Netflex", 
      "Micro", "Zions", "OC Tanner", "GE", "Goldman", "Aegen", "GlaxoSmithKline", "Ford"};
  
  public UserDoc() {
    super();
    id = UUID.randomUUID().toString();
  }


  @Override
  public SolrInputDocument converToSolrDoc() {
    SolrInputDocument solrDoc = new SolrInputDocument();
    solrDoc.setField("id", id);
    solrDoc.setField("firstName", firstName);
    solrDoc.setField("lastName", lastName);
    solrDoc.setField("birthYear", birthYear);
    solrDoc.setField("companyName", companyName);
    solrDoc.setField("state", state);
    solrDoc.setField("permission", permission);
    return solrDoc;
  }

  @Override
  public void populateFields() {
    state = US.randomState().getANSIAbbreviation();
    birthYear = 1930 + RANDOM.nextInt(80);
    companyName = COMPANIES[RANDOM.nextInt(COMPANIES.length)];

    int firstSeparator = id.indexOf('-');
    firstName = "first" + id.substring(0, firstSeparator);
    lastName = "last" + id.substring(firstSeparator+1, id.indexOf('-', firstSeparator+1));
    
    int numOfPermission = RANDOM.nextInt(11); //0~10
    for(int i=0; i<numOfPermission; i++){ //max 10 permissions
      permission.add("permission"+RANDOM.nextInt(10));
    }
  }

  //Getters and Setters are omitted
}

SolrImport.java
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;


public class SolrImport extends Thread {
  final int SOLR_BATCH_SIZE = 2000;

  List<SolrDocPojo> docList = null;

  public SolrImport(List<SolrDocPojo> docList) {
    super();
    this.docList = docList;
  }

  public void run(){
    List<SolrInputDocument> inputList =
        new ArrayList<SolrInputDocument>();

    boolean commit = false;
    for(SolrDocPojo doc: docList){
      inputList.add(doc.converToSolrDoc());
      if( inputList.size() % SOLR_BATCH_SIZE == 0){
        sendToSolr(inputList, commit);
        inputList.clear();
        commit = !commit;
        System.out.println("sendToSolr executed");
      }
    }

    if(inputList.size() > 0){
      sendToSolr(inputList, true);
      inputList.clear();
    }

    System.out.println("done");
  }

  private void sendToSolr(List<SolrInputDocument> docList, boolean commit) {
    try {
      SolrEndPoint.client.add(docList);
      if(commit)
        SolrEndPoint.client.commit();

    } catch (SolrServerException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
}

US.java
This enum class defines all US state names. Except the 'randomState' method, this code is from https://github.com/AustinC/UnitedStates/blob/master/src/main/java/unitedstates/US.java

import java.util.Arrays;
import java.util.List;
import java.util.Random;

public enum US {
  ALABAMA("Alabama","AL","US-AL"),
  ALASKA("Alaska","AK","US-AK"),
  ARIZONA("Arizona","AZ","US-AZ"),
  ARKANSAS("Arkansas","AR","US-AR"),
  CALIFORNIA("California","CA","US-CA"),
  COLORADO("Colorado","CO","US-CO"),
  CONNECTICUT("Connecticut","CT","US-CT"),

  //Omitted

  WYOMING("Wyoming","WY","US-WY"),
  PUERTO_RICO("Puerto Rico","PR","US-PR");

  private static final List<US> VALUES = Arrays.asList(values());
  private static final int SIZE = VALUES.size();
  private static final Random RANDOM = new Random();

  public static US randomState()  {
    return VALUES.get(RANDOM.nextInt(SIZE));
  }
  
  //Omitted
}

SolrEndPoint.java
import org.apache.solr.client.solrj.impl.HttpSolrClient;

public class SolrEndPoint {
  static final HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8983/solr/schemaless").build();  
}


Solr 6: Schemaless and Schema Mode

The schema of the Solr is the place where we tell Solr how it should build indexes from input documents.  The default schema mode after Solr 5 is a schemaless, which allows users/developers to construct a schema without having to manually edit the schema.

On this post, I will talk about how to create a core with schemaless mode, add documents, and also show a few differences between schema mode and schemaless mode in a stand-alone Solr.

Creating a Core
This command starts the Solr.
   bin/solr start -f -m 1g

Now, let's create a core with a name 'schemaless'
slkc:solr-6.6.0 jihwan$ pwd
/Users/jihwan/devTools/solr-6.6.0
slkc:solr-6.6.0 jihwan$ bin/solr create -c schemaless

Copying configuration to new core instance directory:
/Users/jihwan/devTools/solr-6.6.0/server/solr/schemaless

Creating new core 'schemaless' using command:
http://localhost:8983/solr/admin/cores?action=CREATE&name=schemaless&instanceDir=schemaless

{
  "responseHeader":{
    "status":0,
    "QTime":1652},
  "core":"schemaless"}

slkc:solr-6.6.0 jihwan$ 

Schemaless directory contains several configuration files under a 'conf' directory.  By default, this core is a schemaless mode, and a managed-schema file and a solrconfig.xml file are created.  The created solrconfig.xml doesn't declare a <schemaFactory> element and uses a ManagedIndexSchemaFactory for the schemaless mode by default.  In this case, the managed-schema file is used.

Initial Fields in the managed-schema
Right after the core is created, this managed-schema file contains only four <field> and one <copyField> along with many other elements.

<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<!-- doc values are enabled by default for primitive types such as long so we don't index the version field  -->
<field name="_version_" type="long" indexed="false" stored="false"/>
<field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
<field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>

<!-- Only enabled in the "schemaless" data-driven example (assuming the client
     does not know what fields may be searched) because it's very expensive to index everything twice. -->
<copyField source="*" dest="_text_"/>

When other fields are used in a document, their field type will be determined based on field value Java classes, and fields are added to the schema.

Adding Documents
Using a SolrJ client application shown on a next post, randomly generated three millions document are added.  This is a server side log at the beginning of the addition and the last part of the logs.  It took about 2 mins 9 seconds.

2017-07-24 18:21:22.847 INFO  (qtp1205044462-19) 
   [   x:schemaless] o.a.s.s.ManagedIndexSchema Upgraded to managed schema at /Users/jihwan/devTools/solr-6.6.0/server/solr/schemaless/conf/managed-schema
2017-07-24 18:21:23.536 INFO  (qtp1205044462-19) [   x:schemaless] o.a.s.u.p.LogUpdateProcessorFactory 
   [schemaless]  webapp=/solr path=/update params={wt=javabin&version=2}{add=[dcbb6e1e-3ecc-44c3-a02c-bbc9d0725a63 
    (1573829196224921600), e8392adf-6d50-4030-ac58-eda5b49e5f0b (1573829196279447552), 3807603f-e9ea-43b9-8aa3-4274d001aabf 
    (1573829196281544704), 960167b4-9741-4cea-ace0-ace8874b732b (1573829196282593280), be500d86-177a-4bb9-874d-1bc97c8069ba 
    (1573829196283641856), b14ed131-c088-4675-ac4d-dd40227400dc (1573829196285739008), cfacbdc9-e8d0-4509-993b-f8021c2a7b04 
    (1573829196286787584), 103df904-f61b-4b96-a5d7-1239762a7a12 (1573829196287836160), 18d96163-170b-41b8-85f8-72e36e8bfeef 
    (1573829196289933312), 7c5e491e-f1cf-4d84-a044-57cc1c0ccf6d (1573829196289933313), ... (2000 adds)]} 0 775

2017-07-24 18:23:31.431 INFO  (qtp1205044462-22) [   x:schemaless] o.a.s.u.DirectUpdateHandler2 end_commit_flush
2017-07-24 18:23:31.431 INFO  (qtp1205044462-22) [   x:schemaless] o.a.s.u.p.LogUpdateProcessorFactory 
   [schemaless]  webapp=/solr path=/update params {waitSearcher=true&commit=true&softCommit=false&wt=javabin&version=2}{commit=} 0 114

This is a query result.

After adding the documents, we should notice that the managed-schema file under the '/schemaless/conf' directory was modified.  Among several modifications, we can find new fields, which correspond to the fields we used in a document.  The Solr determined each document's fields and their type, and updated the managed-schema file.

<field name="_root_" type="string" docValues="false" indexed="true" stored="false"/>
<field name="_text_" type="text_general" multiValued="true" indexed="true" stored="false"/>
<field name="_version_" type="long" indexed="false" stored="false"/>
<field name="birthYear" type="tlongs"/>
<field name="companyName" type="strings"/>
<field name="firstName" type="strings"/>
<field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
<field name="lastName" type="strings"/>
<field name="permission" type="strings"/>
<field name="state" type="strings"/>

Using this default configuration, a size of the index directory 'data' is 1.26 GB with 274 (segmented) files.

Schema Mode
Although a schemaless is a default mode and support new features such as modifying a schema using API without restarting the Solr server, in fact, Solr Reference Guide says "Schemaless has limitations. It is a bit brute force, and if it guesses wrong, you can't change much about a field without having to reindex", "the Solr community does not recommend going to production without a schema that you have defined yourself"

Developers can explicitly define the schema definition and control field type.
  1. Create a core :  bin/solr create -c schema
    Created core has a schemaless mode.
  2. Stop the server
  3. Rename 'server/solr/schema/conf/managed-schema' to 'server/solr/schema/conf/schema.xml'
  4. Open the schema.xml file and replace the exiting four fields with these fields.
  5. <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
    <!-- doc values are enabled by default for primitive types such as long so we don't index the version field  -->
    <field name="_version_" type="long" indexed="false" stored="false"/>
    <field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>
    <field name="firstName" type="string" indexed="true" stored="true" docValues="false" />
    <field name="lastName" type="string" indexed="true" stored="true" docValues="false" />
    <field name="birthYear" type="int" indexed="true" stored="true" docValues="false" />
    <field name="companyName" type="string" indexed="true" stored="true" docValues="false" />
    <field name="state" type="string" indexed="true" stored="true" docValues="false" />
    <field name="permission" type="string" indexed="false" stored="true" multiValued="true" />    
    
    <!-- Only enabled in the "schemaless" data-driven example (assuming the client
         does not know what fields may be searched) because it's very expensive to index everything twice. -->
    <copyField source="*" dest="_text_"/>
    
  6. I also changed the schema name from 'example-data-driven-schema' to 'user_schema'
  7. Open a solrconfig.xml and add the schemaFactory element. ClassicIndexSchemaFactory is used for the 'schema' mode.
  8. <schemaFactory class="ClassicIndexSchemaFactory"/>
    
  9. Eliminate 'AddSchemaFieldsUpdateProcessorFactory' processor element under the <updateRequestProcessorChain name="add-unknown-fields-to-the-schema"> element.  This processor dynamically adds fields to the schema if an input document contains one or more fields that don't match any field or dynamic field in the schema.  So it needs to be removed with the schema mode.  The updateRequestProcessorChain "add-unknown-fields-to-the-schema" is used based on an <initParams path="/update/**"> element defined in the solrconfig.xml.
Adding Documents with Schema Mode
Same SolrJ application is used to add 3 millions documents.  Based on the server logs, it took 2 minutes 3 seconds.   Compare to the schemaless mode (2 mins 9 secs), the execution time is not much different.  The data directory size was 1.12 GB (compare to 1.26 GB with schemaless)  The size difference is rather caused by different field definition.

This is a query result.

Schemaless with Manually Added Fields
Schemaless mode is not required to define your own fields manually, but you can.  You create a core with the default schemaless mode.  Then, manually add fields to the managed-schema file before you insert a doc with any field.

I added six fields, which are used in my User document, under the existing fields in the managed-schema file.

<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<!-- doc values are enabled by default for primitive types such as long so we don't index the version field  -->
<field name="_version_" type="long" indexed="false" stored="false"/>
<field name="_root_" type="string" indexed="true" stored="false" docValues="false" />
<field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>

<field name="firstName" type="string" indexed="true" stored="true" docValues="false" />
<field name="lastName" type="string" indexed="true" stored="true" docValues="false" />
<field name="birthYear" type="int" indexed="true" stored="true" docValues="false" />
<field name="companyName" type="string" indexed="true" stored="true" docValues="false" />
<field name="state" type="string" indexed="true" stored="true" docValues="false" />
<field name="permission" type="strings" indexed="true" stored="true" />

<!-- Only enabled in the "schemaless" data-driven example (assuming the client
     does not know what fields may be searched) because it's very expensive to index everything twice. -->
<copyField source="*" dest="_text_"/>

Then, run the same SolrJ application to index 3 millions document.  Execution time was 2 minutes 3 seconds with 1.18 GB size of the 'data' directory.  You will also notice that the managed-schema file was not modified after the indexing.

Saturday, July 22, 2017

JavaEE: Microservices with TomEE

Since a term Micro-Web-Services was introduced by Dr. Peter Rodgers in 2005, the concept of smaller/granular services has gained much attention from software communities.  A software developer and author Martin Fowler stated the following in 2014.

"The microservices architectural style is an approach to developing a single application as a suite of small services, each running in its own process and communicating with lightweight mechanisms, often an HTTP resource API.  These services are built around business capabilities and independently deployed by fully automated deployment machinery"

Each of microservices can be deployed, updated, and redeployed independently, but continuously keeping the integrity as a whole application.  Java EE frameworks also support the microservices architecture.

In this post, let's see how the TomEE helps developers to develop and to build microservices as a stand-alone executable jar with minimal footprint.  This is nothing special other than showing how TomEE packs and deploys a microservice.

Microservice Codes

Same asynchronous RESTful API codes shown here will be used with a TomEE maven plugin defined in a pom.xml on a previous post.

Executable Jar of Everything in It
TomEE provides a maven plugin goal 'tomee:exec'.  This is a command to build an executable jar.
   mvn clean install tomee:exec

Directory Structure Before Executing the Command

Directory Structure After Executing the Command
The command creates a demo-exec.jar file under the target directory and its size is a little more 60MG. (I am using a TomEE plus version)


This is an executable jar file that contains everything you need to run the micro web service Restful API.

Run
   java -jar target/demo-exec.jar

This is the beginning of the logs when the command is run.

slkc:demo jihwan$ java -jar target/demo-exec.jar 
Extracting tomee to /Users/jihwan/workspace/MyWork/demo/.distribution
Using CATALINA_BASE:   /Users/jihwan/workspace/MyWork/demo/.distribution/apache-tomee
Using CATALINA_HOME:   /Users/jihwan/workspace/MyWork/demo/.distribution/apache-tomee
Using CATALINA_TMPDIR: /Users/jihwan/workspace/MyWork/demo/.distribution/apache-tomee/temp
Using JRE_HOME:        /Library/Java/JavaVirtualMachines/jdk1.8.0_91.jdk/Contents/Home
Using CLASSPATH:       /Users/jihwan/workspace/MyWork/demo/.distribution/apache-tomee/bin/bootstrap.jar:/Users/jihwan/workspace/MyWork/demo/.distribution/apache-tomee/bin/tomcat-juli.jar

When you run the command for the first time, it creates a directory '.distribution' under the directory where you run the command. Then, it starts your microservices on TomEE.  Under this directory, another 'apache-tomee' directory is created and the entire TomEE server is running with your microservices.  You can modify your code any time and redeploy it independently.



Running the account service shown on a previous post.

Saturday, July 8, 2017

Solr 6: Solr Eclipse Project and Debugging

Creating a Solr Eclipse Project for Debugging
We can download the Solr source code and unpack it.  On my computer, I downloaded the source code under the Solr (binary) home directory.  This is the directory structure under the Solr 6.6 home directory on my computer.  By the way, Solr 6.6 requires a Java 8 or higher.
The build.xml under a 'solr-src' directory contains multiple targets such as "idea" and "eclipse".  I will run an ant command to create an Eclipse project.  You may need to install Apache Ant and Apache Ivy before running the command.
   ant eclipse

After completing the ant command, you can import the Solr project into your Eclipse as an Eclipse project.


Running Solr as a Standalone mode for Debugging
Solr provides a script to help users starting the Solr.  The -f parameter indicates a forward process, -m parameter defines the min and max heap size.  The -a parameter indicates the JVM parameters.  Since "suspend=y" is used, the start process will wait until the remote debugging starts (with a port number 8000 as specified).
bin/solr start -f -m 1g \
    -a "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000" 

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
slkc:solr-6.6.0 jihwan$ pwd
/Users/jihwan/devTools/solr-6.6.0
slkc:solr-6.6.0 jihwan$ bin/solr start -f -a "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000" -m 1g
Listening for transport dt_socket at address: 8000
2017-07-08 20:39:03.184 INFO  (main) [   ] o.e.j.s.Server jetty-9.3.14.v20161028
2017-07-08 20:39:03.545 INFO  (main) [   ] o.a.s.s.SolrDispatchFilter  ___      _       Welcome to Apache Solr™ version 6.6.0
2017-07-08 20:39:03.545 INFO  (main) [   ] o.a.s.s.SolrDispatchFilter / __| ___| |_ _   Starting in standalone mode on port 8983
2017-07-08 20:39:03.545 INFO  (main) [   ] o.a.s.s.SolrDispatchFilter \__ \/ _ \ | '_|  Install dir: /Users/jihwan/devTools/solr-6.6.0
2017-07-08 20:39:03.558 INFO  (main) [   ] o.a.s.s.SolrDispatchFilter |___/\___/_|_|    Start time: 2017-07-08T20:39:03.546Z
2017-07-08 20:39:03.581 INFO  (main) [   ] o.a.s.c.SolrResourceLoader Using system property solr.solr.home: /Users/jihwan/devTools/solr-6.6.0/server/solr
2017-07-08 20:39:03.587 INFO  (main) [   ] o.a.s.c.SolrXmlConfig Loading container configuration from /Users/jihwan/devTools/solr-6.6.0/server/solr/solr.xml
2017-07-08 20:39:03.870 INFO  (main) [   ] o.a.s.u.UpdateShardHandler Creating UpdateShardHandler HTTP client with params: socketTimeout=600000&connTimeout=60000&retry=true
2017-07-08 20:39:04.229 INFO  (main) [   ] o.a.s.c.CorePropertiesLocator Found 0 core definitions underneath /Users/jihwan/devTools/solr-6.6.0/server/solr
2017-07-08 20:39:04.292 INFO  (main) [   ] o.e.j.s.Server Started @46112ms

As displayed, the SolrDispatchFilter is one of objects called during the start.  So, you can find this class in the Eclipse and may put a breakpoint at the beginning of an init method of this class.  Enjoy your debugging!


Monday, July 3, 2017

JavaEE: Container Managed Transaction Boundary

On a previous post, I talked about a common transaction problem and a way to prevent it by controlling concurrent access to Entity data with a locking mechanism.  On this post, let's talk about container managed transaction boundary within the JavaEE.

In a Java EE application, a transaction is a series of actions that must all complete successfully, or else all the changes in each action are backed out.  The transaction can be managed by a JavaEE container, or by an application (bean).  Because of the simplicity and less coding, the container managed transaction can be more easily used and enterprise beans use container managed transaction by default if no transaction demarcation is specified.

In an enterprise bean with container-managed transaction demarcation, the EJB container set the boundary of the transaction.  Typically, the container begins a transaction immediately before an enterprise bean method starts and commits the transaction just before the method exists.  Let 's see what it means.

I will use the same Account Entity shown at the bottom of this post and an AccountService shown on this post.   This is also the same AccountRestEnd class, but let me show it again here.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
@Logging
@Path("/account")
public class AccountRestEnd {
   @Inject
   private Logger logger;
   
   @Inject
   private AccountService accountService;
   
   @POST
   @Path("/balance")
   @Produces(MediaType.APPLICATION_JSON)
   public Response changeBalance(@QueryParam("id") Long acctId,
         @DefaultValue("0") @QueryParam("amount") Integer amount){
      Account acct = null;
      try{
         acct = accountService.updateBalance(acctId, amount);
      }catch(Exception ex){
         return Response.status(Response.Status.CONFLICT).build();
      }
      Response rs = Response.ok(acct).build();
      logger.info("Finished a balance endpoint synchronously.");
      return rs;
   }
}

This RESTful API endpoint is a stateless class, but doesn't have a @Stateless annotation, which makes this object an enterprise bean. (Therefore, this class is not an enterprise bean) When a HTTP request hits this method, this method is not in a boundary of the container managed transaction.  Unlike this class, the AccountService is annotated with the @Stateless and is an EJB managed object.

Understanding the Transaction Boundary
Let's put a breakpoint on the line 19 and run the same test case described under the Problem with Concurrent Access on this post.  After the second thread run, you will experience the the exception on the line 19 of the AccountRestEnd class. You are able to catch an exception inside your code and can control how to handle the exception.   In the matter of the transaction boundary, the transaction started right before entering the updateBalance method of an entity bean, AccountService, and the commit/rollback occurs after the completion of the method.

For demonstration, let's make the AccountRestEnd class an enterprise bean by adding the @Stateless annotation (after the line 2).  It means the container transaction starts before the changeBalance method.  When you run the same test case, the EJBTransactionRollbackException will NOT be caught on the line 19  (or 20 after adding the Stateless annotation) because the rollback occurs after the method of the JEB managed object, AccountRestEnd.  The client will receive a 500 Internal Server Error with the TomEE instead of an error status that developers handles.

Controlling the Scope of a Transaction
A transaction attribute controls the scope of a transaction and it can be one of the followings: Required, RequiresNew, Mandatory, NotSupported, Supports, or Never.   More description of each attributes can be found on this  Java EE 7 tutorial.  Let's use an attribute and see how the transaction boundary changes within an entity bean.

Our changeBalance method in the AccountRestAccount doesn't do any database operation by itself and there is no reason to have longer transaction boundary especially any database locking is used.

Now, this AccountRestEnd class is an enterprise bean by having a @Stateless annotation, but the @TransactionAttribute changes a transaction boundary with a NOT_SUPPORTED attribute. This attribute makes a container managed transaction not to start before the changeBalance method. 

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
@Logging
@Path("/account")
@Stateless
public class AccountRestEnd {
   @Inject
   private Logger logger;
   
   @Inject
   private AccountService accountService;
   
   @POST
   @Path("/balance")
   @Produces(MediaType.APPLICATION_JSON)
   @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED)
   public Response changeBalance(@QueryParam("id") Long acctId,
         @DefaultValue("0") @QueryParam("amount") Integer amount){
      Account acct = null;
      try{
         acct = accountService.updateBalance(acctId, amount);
      }catch(EJBTransactionRolledbackException ex){
         return Response.status(Response.Status.CONFLICT).build();
      }
      Response rs = Response.ok(acct).build();
      logger.info("Finished a balance endpoint synchronously.");
      return rs;
   }
}

If you run the same test case, the transaction rollback is triggered after the updateBalance method and the changeBalance method of the enterprise bean, AccountRestEnd' can catch the EJBTransactionRolledbackException at the line 21.  When you use several breakpoints in these code during the test, the debug stack trace on your IDE can show you the calling process more clearly.

Saturday, July 1, 2017

JavaEE 7: Asynchronous RESTful API with Concurrency Utilities

Different web server creates a different number of (limited) HTTP I/O threads.  Some server itself implements a single thread I/O and supports asynchronous HTTP requests.  In real life, HTTP requests may take some time to execute their logic: for examples, sending email, batch process, and/or complex DB operations.  During this long process, it will be inefficient to hold the (limited) http I/O threads until the process ends.  We should try to release the I/O thread and let it handles other http requests.

Although Java SE5 provides an API support for concurrency via the java.util.concurrent package, there were no specific API that allowed enterprise developers to use concurrency utilities in a safe manner within the JavaEE container managed thread pools.

JavaEE 7 containers are now required to provide 4 types of managed objects that implement these interfaces. Applications look up managed objects via JNDI lookup or resource injection using @Resource.
  • ManagedExecutorService
  • ManagedScheduledExecutorService
  • ManagedThreadFactory
  • ContextService
On this post, I will use a ManagedExecutorService to create a Asynchronous HTTP request endpoint. This is an AccountService I used on an earlier post, but I added 1 second thread sleep for a demonstration purpose.

@Logging
@Stateless
public class AccountService {
   @PersistenceContext(name="demoDB")
   private EntityManager em;
   
   public Account updateBalance(Long id, Integer amountChange){
      Account acct = em.find(Account.class, id);
      try {
         Thread.sleep(1000);
      } catch (InterruptedException e) {
      }
      
      acct.setBalance(acct.getBalance() + amountChange);
      //Updating the updateTime field is omitted on purpose 
      return acct;
   }
}

Synchronous API
Again, this is the same web service I used on a previous post.

@Logging
@Path("/account")
public class AccountRestEnd {
   @Inject
   private Logger logger;
   
   @Inject
   private AccountService accountService;
   
   @POST
   @Path("/balance")
   @Produces(MediaType.APPLICATION_JSON)
   public Response changeBalance(@QueryParam("id") Long acctId,
         @DefaultValue("0") @QueryParam("amount") Integer amount){
      Account acct = null;
      try{
         acct = accountService.updateBalance(acctId, amount);
      }catch(Exception ex){
         return Response.status(Response.Status.CONFLICT).build();
      }
      Response rs = Response.ok(acct).build();
      logger.info("Finished a balance endpoint synchronously.");
      return rs;
   }
}

By triggering a POST request http://localhost:8080/demo/account/balance?id=1&amount=1 , we see this logs, which is created by the interceptor Logging explained on this post.  Every method call is started and finished in the order and the log shows that all methods are executed with a http-nio-8080-exec-2 thread.
[http-nio-8080-exec-2] demo.interceptor.LoggingInterceptor.logMethod  Entering demo.rest.endpoint.AccountRestEnd - changeBalance
[http-nio-8080-exec-2] demo.interceptor.LoggingInterceptor.logMethod  Entering demo.rest.service.AccountService - updateBalance
[http-nio-8080-exec-2] demo.interceptor.LoggingInterceptor.logMethod  Exiting  demo.rest.service.AccountService - updateBalance Execution Time: 1004ms
[http-nio-8080-exec-2] demo.rest.endpoint.AccountRestEnd.changeBalance Finished a balance endpoint synchronously.
[http-nio-8080-exec-2] demo.interceptor.LoggingInterceptor.logMethod  Exiting  demo.rest.endpoint.AccountRestEnd - changeBalance Execution Time: 1010ms

Asynchronous API
This Asynchronous API uses the ManagedExecutorService along with a AsyncResponse.  The Asynchronous method needs to be a void type method and the 'execute' method creates an asynchronous process.  To resume the http response, the 'resume' method of the AsyncResponse object.
@Logging
@Path("/accountasync")
public class AccountAsyncRestEnd {
   @Inject
   private Logger logger;
   
   @Inject
   private AccountService accountService;
   
   @Resource
   ManagedExecutorService mes;
   
   @POST
   @Path("/balance")
   public void changeBalanceAsync(@Suspended AsyncResponse ar, 
         @QueryParam("id") Long acctId,
         @DefaultValue("0") @QueryParam("amount") Integer amount){
      
      mes.execute(new Runnable() {
         @Override
         public void run() {
            Account acct = null;
            try{
               acct = accountService.updateBalance(acctId, amount);
            }catch(Exception ex){
               ar.resume(Response.status(Response.Status.CONFLICT).build());
            }
            Response rs = Response.ok(acct)
                  .encoding(MediaType.APPLICATION_JSON).build();
            ar.resume(rs);
         }
      });

      logger.info("Finished a balance endpoint Asynchronously.");
   }
}

Let's trigger a POST request http://localhost:8080/demo/accountasync/balance?id=1&amount=1 and see the log again.

[http-nio-8080-exec-38] demo.interceptor.LoggingInterceptor.logMethod  Entering demo.rest.endpoint.AccountAsyncRestEnd - changeBalanceAsync
[http-nio-8080-exec-38] demo.rest.endpoint.AccountAsyncRestEnd.changeBalanceAsync Finished a balance endpoint Asynchronously.
[http-nio-8080-exec-38] demo.interceptor.LoggingInterceptor.logMethod  Exiting  demo.rest.endpoint.AccountAsyncRestEnd - changeBalanceAsync Execution Time: 0ms
[managed-thread-4] demo.interceptor.LoggingInterceptor.logMethod  Entering demo.rest.service.AccountService - updateBalance
[managed-thread-4] demo.interceptor.LoggingInterceptor.logMethod  Exiting  demo.rest.service.AccountService - updateBalance Execution Time: 1005ms

The RESTful API endpoint is executed with a http-nio-8080-exec-38 thread and the changeBalanceAsync method is completed immediately.
Exiting  demo.rest.endpoint.AccountAsyncRestEnd - changeBalanceAsync Execution Time: 0ms
The business login in the updateBalance method is executed with a separate thread 'manaed-thread-4' and the result is returned 1005 ms later in this example.

Java 9: Flow - Reactive Programming

Programming world has always been changed fast enough and many programming / design paradigms have been introduced such as object oriented p...