You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 6 Next »

– This document is a work in progress.

Overview

Templeton provides a REST-like web API for HCatalog and related Hadoop components. Developers can make HTTP requests to the Templeton web server to execute HCatalog DDL commands. With the REST APIs in place for HCatalog DDL commands, it is desirable to have a JAVA APIs in HCAT which can help end users to execute DDL commands without using CLI.

Design

New Classes:

HCatClient

The HCatClient is an interface containing all the APIs permitted HCatalog DDL commands.

 package org.apache.hcatalog.api;

import java.util.List;

import org.apache.hcatalog.common.HCatException;
import org.apache.hcatalog.mapreduce.HCatDatabaseInfo;
import org.apache.hcatalog.mapreduce.HCatPartitionInfo;
import org.apache.hcatalog.mapreduce.HCatTableInfo;

/**
 * The Interface HCatClient containing APIs for HCatalog DDL commands.
 */
public interface HCatClient {

    /**
     * Gets the database like.
     *
     * @param regex The regular expression. Providing "*" would retrieve all the names
     *              of the databases.
     * @return The list of all the database names.
     * @throws HCatException
     */
    public List<String> getDatabaseLike(String regex) throws HCatException;

    /**
     * Gets the database.
     *
     * @param dbName The name of the database.
     * @return An instance of HCatDatabaseInfo.
     * @throws HCatException
     */
    public HCatDatabaseInfo getDatabase(String dbName) throws HCatException;

    /**
     * Creates the database.
     *
     * @param dbInfo An instance of HCatCreateDBDesc.
     * @return true, if successful
     * @throws HCatException
     */
    public boolean createDatabase(HCatCreateDBDesc dbInfo)
            throws HCatException;

    /**
     * Deletes a database.
     *
     * @param dbName The name of the database to delete.
     * @param ifExists Hive returns an error if the database specified does not exist,
     *                 unless ifExists is set to true.
     * @param mode This is set to either "restrict" or "cascade". Restrict will
     *             remove the schema if all the tables are empty. Cascade removes
     *             everything including data and definitions.
     * @param userGroup The user group to use
     * @param permissions The permissions string to use. The format is "rwxrw-r-x".
     * @return true, if successful
     * @throws HCatException
     */
    public boolean deleteDatabase(String dbName, boolean ifExists, String mode,
            String userGroup, String permissions) throws HCatException;

    /**
     * Gets the tables like a pattern specified.
     *
     * @param dbName The name of the database.
     * @param regex The regular expression. Providing "*" would retrieve all the names
     *              of  the table.
     * @return A list of all table names matching the specified pattern.
     * @throws HCatException
     */
    public List<String> getTablesLike(String dbName, String regex)
            throws HCatException;

    /**
     * Gets the table.
     *
     * @param dbName The name of the database.
     * @param tableName The name of the table.
     * @return An instance of HCatTableInfo.
     * @throws HCatException
     */
    public HCatTableInfo getTable(String dbName, String tableName)
            throws HCatException;

    /**
     * Creates the table.
     *
     * @param createTableDesc An instance of HCatCreateTableDesc class.
     * @return true, if successful.
     * @throws HCatException the h cat exception
     */
    public boolean createTable(HCatCreateTableDesc createTableDesc)
            throws HCatException;

    /**
     * Creates the table like an existing table.
     *
     * @param dbName The name of the database.
     * @param existingTblName The name of the existing table.
     * @param newTableName The name of the new table.
     * @param ifExists the if exists
     * @param isExternal Set to "true", if table has be created at a different
     *                   location other than default.
     * @param location The location for the table.
     * @return true, if successful
     * @throws HCatException
     */
    public boolean createTableLike(String dbName, String existingTblName,
            String newTableName, boolean ifExists, boolean isExternal,
            String location) throws HCatException;

    /**
     * Delete a table.
     *
     * @param dbName The name of the database.
     * @param tableName The name of the table.
     * @param ifExists Hive returns an error if the database specified does not exist,
     *                 unless ifExists is set to true.
     * @param userGroup The user group to use.
     * @param permissions The permissions string to use. The format is "rwxrw-r-x".
     * @return true, if successful
     * @throws HCatException
     */
    public boolean deleteTable(String dbName, String tableName,
            boolean ifExists, String userGroup, String permissions)
            throws HCatException;

    /**
     * Renames a table.
     *
     * @param dbName The name of the database.
     * @param oldName The name of the table to be renamed.
     * @param newName The new name of the table.
     * @param userGroup The user group to use.
     * @param permissions The permissions string to use. The format is "rwxrw-r-x".
     * @return true, if successful
     * @throws HCatException
     */
    public boolean renameTable(String dbName, String oldName, String newName,
            String userGroup, String permissions) throws HCatException;

    /**
     * Gets all the partitions.
     *
     * @param dbName The name of the database.
     * @param tblName The name of the table.
     * @return A list of partition names.
     * @throws HCatException the h cat exception
     */
    public List<HCatPartitionInfo> getPartitions(String dbName, String tblName)
            throws HCatException;

    /**
     * Gets the partition.
     *
     * @param dbName The database name.
     * @param tableName The table name.
     * @param partitionName The partition name, Comma separated list of col_name='value'.
     * @return An instance of HCatPartitionInfo.
     * @throws HCatException
     */
    public HCatPartitionInfo getPartition(String dbName, String tableName,
            String partitionName) throws HCatException;

    /**
     * Adds the partition.
     *
     * @param partInfo An instance of HCatAddPartitionDesc.
     * @return true, if successful
     * @throws HCatException the h cat exception
     */
    public boolean addPartition(HCatAddPartitionDesc partInfo) throws HCatException;

    /**
     * Deletes partition.
     *
     * @param dbName The database name.
     * @param tableName The table name.
     * @param partitionName The partition name, Comma separated list of col_name='value'.
     * @param ifExists Hive returns an error if the partition specified does not exist, unless ifExists is set to true.
     * @param userGroup The user group to use.
     * @param permissions The permissions string to use. The format is "rwxrw-r-x".
     * @return true, if successful
     * @throws HCatException
     */
    public boolean deletePartition(String dbName, String tableName,
            String partitionName, boolean ifExists, String userGroup,
            String permissions) throws HCatException;

    /**
     * List partitions by filter.
     *
     * @param dbName The database name.
     * @param tblName The table name.
     * @param filter The filter string,
     *    for example "part1 = \"p1_abc\" and part2 <= "\p2_test\"". Filtering can
     *    be done only on string partition keys.
     * @return list of partitions
     * @throws HCatException the h cat exception
     */
    public List<HCatPartitionInfo> listPartitionsByFilter(String dbName, String tblName,
            String filter) throws HCatException;

}
HCatTempletonClient

This class implements HCatClient interface.

HCatTempletonDriver

This class implements Hive's CommandProcessorResponse interface.

public interface CommandProcessor {
  public void init();

  public CommandProcessorResponse run(String command) throws CommandNeedRetryException;
}

The "run" method will consume the curl command as an input parameter and return the response.

HCatCommandDesc

This is an abstract class that helps in validating user input, building valid command descriptors and queries.

/**
 * The Class HCatCommandDesc contains methods which help in validating,
 * building command descriptors and queries.
 */
public abstract class HCatCommandDesc{

    public abstract void validateCommandDesc() throws HCatException;
    abstract String buildQuery() throws HCatException;
    abstract boolean isValidationComplete();

}
HCatCreateTableDesc

This class is a sub class of HCatCommandDesc and will be used by the users to create descriptor and validate it for the "create table" command.

public class HCatCreateDBDesc extends HCatCommandDesc {

    private String databaseName;
    private String locationUri;
    private String comment;
    private Map<String, String> dbProperties;

    /**
     * Gets the database properties.
     *
     * @return the database properties
     */
    Map<String, String> getDatabaseProperties() {
        return this.dbProperties;
    }

    /**
     * Sets the database properties.
     *
     * @param dbProps the db props
     */
    public void setDatabaseProperties(Map<String, String> dbProps) {
        this.dbProperties = dbProps;
    }

    /**
     * Gets the name.
     *
     * @return the name
     */
    String getName() {
        return this.databaseName;
    }

    /**
     * Sets the name.
     *
     * @param databaseName the new name
     */
    public void setName(String databaseName) {
        this.databaseName = databaseName;
    }

    /**
     * Gets the comment.
     *
     * @return the comment
     */
    String getComment() {
        return this.comment;
    }

    /**
     * Sets the comment.
     *
     * @param comment the new comment
     */
    public void setComment(String comment) {
        this.comment = comment;
    }

    /**
     * Gets the location.
     *
     * @return the location
     */
    String getLocation() {
        return this.locationUri;
    }

    /**
     * Sets the location.
     *
     * @param location the new location
     */
    public void setLocation(String location) {
        this.locationUri = location;
    }

    /* @return
    /* @throws HCatException
     * @see org.apache.hcatalog.api.HCatCommandDescBuilder#buildCommandDesc()
     */
    @Override
    public void validateCommandDesc()
            throws HCatException {

    }

    /* @param desc
    /* @return
    /* @throws HCatException
     * @see org.apache.hcatalog.api.HCatCommandDescBuilder#buildQuery(java.lang.Class)
     */
    @Override
    String buildQuery() throws HCatException {
        // TODO Auto-generated method stub
        return null;
    }

    /* @return
     * @see org.apache.hcatalog.api.HCatCommandDesc#isValidationComplete()
     */
    @Override
    boolean isValidationComplete() {
        // TODO Auto-generated method stub
        return false;
    }

}
HCatCreateDBDesc

This class is a sub class of HCatCommandDesc and will be used by the users to create descriptos and validate it for the "create database" command.

public class HCatCreateTableDesc extends HCatCommandDesc{

    private String tableName;
    private boolean isExternal;
    private ArrayList<HCatFieldSchema> cols;
    private ArrayList<HCatFieldSchema> partCols;
    private ArrayList<String> bucketCols;
    private ArrayList<Order> sortCols;
    private int numBuckets;
    private String dbName;
    private String comment;
    private String fileFormat;
    private String location;
    private String storageHandler;
    private Map<String, String> tblProps;
    private boolean ifNotExists;

    ArrayList<HCatFieldSchema> getColsString() {
      return this.getCols();
    }

    boolean getIfNotExists() {
        return this.ifNotExists;
    }

    /**
     * Sets the if not exists.
     * If true, the user will not receive an error if the table already exists.
     * @param ifNotExists the new if not exists
     */
    public void setIfNotExists(boolean ifNotExists) {
      this.ifNotExists = ifNotExists;
    }

   String getTableName() {
        return this.tableName;
    }

    String getDatabaseName(){
        return this.dbName;
    }

    /**
     * Sets the database name.
     *
     * @param dbName the new database name
     */
    public void setDatabaseName(String dbName){
        this.dbName = dbName;
    }

    /**
     * Sets the table name.
     *
     * @param tableName the new table name
     */
    public void setTableName(String tableName) {
      this.tableName = tableName;
    }

    ArrayList<HCatFieldSchema> getCols() {
       // ArrayList<FieldSchema> cols = this.tableDesc.getCols();
        return null;
    }

    /**
     * Sets the table columns.
     *
     * @param cols List of columns.
     */
    public void setCols(ArrayList<HCatFieldSchema> cols) {
        //convert and set.
      this.cols = null;
    }

    ArrayList<HCatFieldSchema> getPartCols() {
        return null;
    }

    /**
     * Sets the part cols.
     *
     * @param partCols List of partition columns.
     */
    public void setPartCols(ArrayList<HCatFieldSchema> partCols) {
      //this.partCols = partCols;
    }

    ArrayList<String> getBucketCols() {
        return this.bucketCols;
    }

    /**
     * Sets the bucket cols.
     *
     * @param bucketCols The list of columns to be used for clustering.
     */
    public void setBucketCols(ArrayList<String> bucketCols) {
      this.bucketCols = bucketCols;
    }

    int getNumBuckets() {
        return this.numBuckets;
    }

    /**
     * Sets the num buckets.
     *
     * @param numBuckets The number of buckets.
     */
    public void setNumBuckets(int numBuckets) {
      this.numBuckets = numBuckets;
    }

    String getComment() {
        return this.comment;
    }

    /**
     * Sets the comment.
     *
     * @param comment The comment for the table.
     */
    public void setComment(String comment) {
      this.comment = comment;
    }


    String getStorageHandler() {
        return this.storageHandler;
    }

    /**
     * Sets the storage handler.
     *
     * @param storageHandler the new storage handler
     */
    public void setStorageHandler(String storageHandler) {
      this.storageHandler = storageHandler;
    }

    String getLocation() {
        return this.location;
    }

    /**
     * Sets the location.
     *
     * @param location the new location
     */
    public void setLocation(String location) {
      this.location = location;
    }

    boolean getExternal() {
        return this.isExternal;
    }

    /**
     * Sets the external.
     *
     * @param isExternal True/False, indicating if the table is an external table.
     */
    public void setExternal(boolean isExternal) {
      this.isExternal = isExternal;
    }

    ArrayList<Order> getSortCols() {
        return this.sortCols;
    }

    /**
     * Sets the sort cols.
     *
     * @param sortCols the sortCols to set
     */
    public void setSortCols(ArrayList<Order> sortCols) {
      this.sortCols = sortCols;
    }

    /**
     * @return the table properties
     */
    Map<String, String> getTblProps() {
        return this.tblProps;
    }

    /**
     * @param tblProps
     *          the table properties to set
     */
    public void setTblProps(Map<String, String> tblProps) {
      this.tblProps = tblProps;
    }

    /**
     * Sets the file format.
     *
     * @param format the new file format
     */
    public void setFileFormat(String format){
        this.fileFormat = format;
    }

    String getFileFormat(){
        return this.fileFormat;
    }

    /* @return
    /* @throws HCatException
     * @see org.apache.hcatalog.api.HCatCommandDescBuilder#buildCommandDesc()
     */
    @Override
    public void validateCommandDesc() throws HCatException {
        // TODO Auto-generated method stub
    }


    /* @param desc
    /* @return
    /* @throws HCatException
     * @see org.apache.hcatalog.api.HCatCommandDescBuilder#buildQuery(org.apache.hcatalog.api.HCatCommandDescBuilder)
     */
    @Override
    String buildQuery() throws HCatException {
        // TODO Auto-generated method stub
        return null;
    }


    /* @return
     * @see org.apache.hcatalog.api.HCatCommandDesc#isValidationComplete()
     */
    @Override
    boolean isValidationComplete() {
        // TODO Auto-generated method stub
        return false;
    }

HCatAddPartitionDesc

This class is a sub class of HCatCommandDesc and will be used by the users to create descriptos and validate it for the "add partition" command.

public class HCatAddPartitionDesc extends HCatCommandDesc {

    //private AddPartitionDesc addPartDesc;
    private String dbName;
    private String tableName;
    private String location;
    private LinkedHashMap<String,String> partSpec;

    /**
     * @return database name
     */
     String getDbName() {
         return this.dbName;
    }

    /**
     * Sets the db name.
     *
     * @param dbName database name
     */
    public void setDbName(String dbName) {
      this.dbName = dbName;
    }

    /**
     * @return the table we're going to add the partitions to.
     */
     String getTableName() {
      return this.tableName;
    }

    /**
     * Sets the table name.
     *
     * @param tableName the table we're going to add the partitions to.
     */
    public void setTableName(String tableName) {
      this.tableName = tableName;
    }

    String getLocation() {
      return this.location;
    }

    /**
     * Sets the location.
     *
     * @param location The location of partition in relation to table
     */
    public void setLocation(String location) {
      this.location = location;
    }

    /**
     * @return partition specification.
     */
     LinkedHashMap<String, String> getPartSpec() {
         return this.partSpec;
    }

    /**
     * Adds the partition name and value.
     *
     * @param colName The column name.
     * @param value The value.
     */
    public void addPartSpec(String colName, String value) {
      this.partSpec.put(colName, value);
    }

    @Override
    public void validateCommandDesc()
            throws HCatException {

    }

    /* @return
    /* @throws HCatException
     * @see org.apache.hcatalog.api.HCatCommandDesc#buildQuery()
     */
    @Override
    String buildQuery() throws HCatException {
        // TODO Auto-generated method stub
        return null;
    }

    /* @return
     * @see org.apache.hcatalog.api.HCatCommandDesc#isValidationComplete()
     */
    @Override
    boolean isValidationComplete() {
        // TODO Auto-generated method stub
        return false;
    }

}
HCatDBInfo

Modification to Existing Classes:

HCatTableInfo
PartInfo

Usage


 Configuration config = new Configuration();
 config.add("hive-site.xml");
 HCatTempletonClient client = new HCatTempletonClient(config);
 
 HCatCreateTableDesc desc = new HCatCreateTableDesc();
 desc.setTableName("demo_table");
 desc.setDatabaseName("db1");
 desc.setFileFormat("rcfile");
 ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>();
 cols.add(new HCatFieldSchema("col1", Type.INT, "comment1"));
 cols.add(new HCatFieldSchema("col2", Type.STRING, "comment2"));
 desc.setCols(cols);
 
 //Validate
 desc.validateCommandDesc();
 boolean success = client.createTable(desc);

Concerns

  • No labels