...
Code Block | ||
---|---|---|
| ||
# Build the Flink job server (default job server for PortableRunner) that stores the container locally. ./gradlew :beam-runners-:flink_2.11-:1.5:job-server-:container:docker # Build portable SDK Harness which builds and stores the container locally. ./gradlew :beam-sdks-:python-:container:docker # Run the pipeline. python -m apache_beam.examples.wordcount --runner PortableRunner --input <local input file> --output <local output file> |
...
Code Block | ||
---|---|---|
| ||
# Build portable worker ./gradlew :beam-runners-:google-cloud-dataflow-java-fn-api-:worker:build -x spotlessJava -x rat -x test ./gradlew :beam-runners-:google-cloud-dataflow-java-fn-api-:worker:shadowJar # Build portable Pyhon SDK harness and publish it to GCP ./gradlew -Pdocker-repository-root=gcr.io/dataflow-build/$USER/beam -p sdks/python/container docker gcloud docker -- push gcr.io/dataflow-build/$USER/beam/python:latest # Initialize python cd sdks/python virtualenv env . ./env/bin/activate # run pipeline python -m apache_beam.examples.wordcount --runner DataflowRunner --num_workers 1 --project <gcp_project_name> --output <gs://path> --temp_location <gs://path> --worker_harness_container_image gcr.io/dataflow-build/$USER/beam/python:latest --experiment beam_fn_api --sdk_location build/apache-beam-2.12.0.dev0.tar.gz --dataflow_worker_jar './runners/google-cloud-dataflow-java/worker/build/libs/beam-runners-google-cloud-dataflow-java-fn-api-worker-2.12.0-SNAPSHOT.jar' --debug |
...