Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.
  • Motivation:
    • current way of managing job config via files makes it hard to manage job configurations, persisting it in MySQL would make it easier to manage and also provide various CRUD API interfaces to mange the job configuration.
  • Proposed Change
    • create MySQL table `gobblin_job_config`
    • create REST endpoints for CRUD operations
    • add functionality to detect change in any job config and schedule or trigger job accordingly ( similar to current mechanism based on file change )
    • In future, Gobblin should use JDBI (v3) as ORM instead of plan JDBC based persistence which will become cumbersome as it uses DB heavily for all kinds of metadata storage.
  • New or Changed Public Interfaces:
    • POST /job/{jobName}
    • PUT /job//{jobName}
    • GET /job/{jobName}
    • DELETE /job/{jobName}
  • Migration Plan and Compatibility
    • since this is new functionality, in addition to the current file based job config, user can select either one to manage the job.
  • Rejected Alternatives:
    • use of other REST framework ( preferring to use REST.li instead of dropwizard or any other as its built in framework for other APIs

Design:


  • MySQL

...

  • DDL: ( V1_0_4__job_store.sql )

Code Block
languagesql
firstline1
titlegobblin_job_config



CREATE TABLE gobblin_job_owners
`gobblin_job` (
  `id` int(11) nameNOT NULL AUTO_INCREMENT,
  `name` varchar(255) NOT NULL,
  `description` varchar(255) NOTDEFAULT NULL,
  `schedule` varchar(64) emailDEFAULT NULL,
  `is_disabled` tinyint(1) DEFAULT '0',
  varchar`priority` smallint(2556) NOTDEFAULT NULL,
  `configs` text,
 team_name    int(11)               DEFAULT NULL,
    team_email   int(11)               DEFAULT NULL,
    org_name     int(11)               DEFAULT NULL,
    created_date timestamp    NOT NULL DEFAULT CURRENT_TIMESTAMP,
    updated_date timestamp    NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    PRIMARY KEY (email)
) ENGINE = InnoDB
  DEFAULT CHARSET = utf8;

CREATE TABLE gobblin_sync_systems
(
    name         varchar(255 `owner_email` varchar(255) DEFAULT NULL,
  `source_system` varchar(255) DEFAULT NULL,
  `target_system` varchar(255) DEFAULT NULL,
  `job_template` varchar(255) DEFAULT NULL,
  `created_date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  `updated_date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (`name`),
  KEY `name` (`name`),
  KEY `id` (`id`),
  KEY `gobblin_job_template_fk` (`job_template`),
  KEY `source_system` (`source_system`),
  KEY `target_system` (`target_system`),
  CONSTRAINT `gobblin_job_ibfk_1` FOREIGN KEY (`source_system`) REFERENCES `gobblin_sync_system` (`id`),
  CONSTRAINT `gobblin_job_ibfk_2` FOREIGN KEY (`target_system`) REFERENCES `gobblin_sync_system` (`id`),
  CONSTRAINT `gobblin_job_template_fk` FOREIGN KEY (`job_template`) REFERENCES `gobblin_job_template` (`name`)
) ENGINE=InnoDB AUTO_INCREMENT=25 DEFAULT CHARSET=utf8;


CREATE TABLE `gobblin_job_template` (
  `name` varchar(255) NOT NULL DEFAULT '',
  `configs` text NOT NULL,
  PRIMARY KEY (`name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;


CREATE TABLE `gobblin_sync_system` (
  `id` varchar(255) NOT NULL,
  `name` varchar(64) NOT NULL,
  `type`  db_type     varchar(64) NOT NULL DEFAULT '',
  `datacenter` varchar(25564) NOTDEFAULT NULL,
  `region` varchar(64) usersDEFAULT NULL,
  `zone` varchar(64) DEFAULT NULL,
  `users` varchar(2551024) NOTDEFAULT NULL,
  `on_hold`  on_hold     tinyint(1) NOT NULL DEFAULT '0',
  `deprecated` tinyint(1)   NOT NULL DEFAULT '0',
    deprecated  `source_support` tinyint(1)   NOT NULL DEFAULT '0',
  `target_support` tinyint(1) configNOT NULL DEFAULT '0',
  `configs`  text,
    created`created_datedate` timestamp    NOT NULL DEFAULT CURRENT_TIMESTAMP,
    updated`updated_datedate` timestamp    NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    PRIMARY KEY (name`id`)
) ENGINE = InnoDB
  DEFAULT CHARSET = utf8;


CREATE TABLE gobblin_jobsjob_owner
(
    idname            int(11varchar(255)      NOT NULL AUTO_INCREMENT,
    name email         varchar(255) NOT NULL,
    descriptionteam_name    varcharint(255)      11)    DEFAULT NULL,
    schedule      varchar(64)           DEFAULT NULL,
    isteam_disabledemail   tinyintint(111)            DEFAULT '0',
    priority      smallint              DEFAULT 100,
    configs       text,
    owner_email   varchar(255) NOT NULL,
    source_system varchar(255) NOT NULL,
org_name      #source_dataset varchar(255int(11)          DEFAULT NULL,
    #target_dataset varchar(255)          DEFAULT NULL,
    target_system varchar(255) NOT NULL,
    created_date  timestamp    NOT NULL DEFAULT CURRENT_TIMESTAMP,
    updated_date  timestamp    NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    PRIMARY KEY (nameemail),
#) ENGINE = InnoDB
  FOREIGNDEFAULT KEY (owner_email) REFERENCES gobblin_job_owners (email),
#     FOREIGN KEY (source_system) REFERENCES gobblin_sync_systems (name),
#     FOREIGN KEY (target_system) REFERENCES gobblin_sync_systems (name),
    INDEX (name),
    INDEX (id)
) AUTO_INCREMENT = 1
  ENGINE = InnoDB
  DEFAULT CHARSET = utf8;


CREATE TABLE gobblin_sync_systems_maintenance
(
    id               int(11)      CHARSET = utf8;




Alternate proposal: 

Code Block
languagesql
//Shirshanka: this will allow us to model job_config as a schema-d thing ... and evolve versions without having to use DDL to perform MySQL schema changes...  also allows to swap out a K-V store for this easily

CREATE TABLE `gobblin_job` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
    sync_system_name`name` varchar(255) NOT NULL,
  `configs` text,
 type             varchar(255) NOT NULL`version` int(11),
    start`created_timedate` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  datetime`updated_date` timestamp    NOT NULL,
 DEFAULT   endCURRENT_timeTIMESTAMP ON        datetime     NOT NULL,
  UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (id`name`),
  KEY `name` FOREIGN KEY (sync_system_name) REFERENCES gobblin_sync_systems (name(`name`),
  KEY `id` (`id`)
) ENGINE = InnoDB
 AUTO_INCREMENT=1 DEFAULT CHARSET = utf8;



  • JOB MGMT. CRUD REST API


  1. Create New job example

Code Block
titlePOST body
METHOD: POST 
URL:	http://localhost:9090/job/PullFromWikipedia

BODY:
{
	"jobName": "PullFromWikipedia",
	"jobDescription": "A getting started example for Gobblin",
	"jobDisabled": false,
	"jobProperties": {
		"job.group": "Wikipedia",
		"source.class": "org.apache.gobblin.example.wikipedia.WikipediaSource",
		"test_remove": "test_remove_value",
		"something": "asdsad"
	}
}

RESPONSE: 201 - created


2. update existing job

Code Block
titlePOST body
POSTMETHOD: PUT 
URL:    http://localhost:9090/job/PullFromWikipedia

BODY:
{
	"jobName    "name": "PullFromWikipedia",
	"jobDescription    "description": "A getting started example for Gobblin - pull some pages from wikipedia",
	"jobDisabled    "disabled": true,
    "schedule": "",
    "priority": false98,
	"jobProperties    "configs": {
		        "job.group": "Wikipedia",
		        "source.class": "org.apache.gobblin.example.wikipedia.WikipediaSource",
		        "test_removeadd": "test_remove_value",
		        "something": "asdsad"
	    },
	"_jobPropertiesToRemove": [ 
		"key" 
	]
}

    "ownerEmail": "jsenjaliya@paypal.com",
    "sourceSystem": "simba",
    "targetSystem": "RADD-LVS",
    "configsToRemove": [
        "test_remove"
    ]
}

RESPONSE: 
200 - OK



3. GET existing job

Code Block
titleGET Result
GET httpMETHOD: GET 
URL: 	http://localhost:9090/job/PullFromWikipedia

BODY: None

RESPONSE: 
{
    "jobNameschedule": "PullFromWikipediarun_once",
    "jobIdconfigs": 14,{
        "jobPropertiesjob.group": {"Wikipedia",
        "source.class": "org.apache.gobblin.example.wikipedia.WikipediaSource",
        "test_removesomething": "test_remove_valueasdsad",
        "job.grouptest_add": "Wikipediatest_remove_value",
    },
    "somethingcreatedDate": "asdsad"1587770060000,
     }"sourceSystem": "simba",
    "jobDisabledname": false"PullFromWikipedia",
    "jobDescriptiondescription": "A getting started example for Gobblin - pull some pages from wikipedia",
    "disabled": true,
    "id": 1,
    "updatedDate": 1587774853000,
    "created_date"priority": 98,
    "targetSystem": 1575937161000"RADD-LVS",
    "updated_dateownerEmail": 1575937161000"jsenjaliya@paypal.com"
}