blob: 5c4494da5fc18a523ea12dbf59a523ffc3f14850 [file] [log] [blame]
<html><body>
<style>
body, h1, h2, h3, div, span, p, pre, a {
margin: 0;
padding: 0;
border: 0;
font-weight: inherit;
font-style: inherit;
font-size: 100%;
font-family: inherit;
vertical-align: baseline;
}
body {
font-size: 13px;
padding: 1em;
}
h1 {
font-size: 26px;
margin-bottom: 1em;
}
h2 {
font-size: 24px;
margin-bottom: 1em;
}
h3 {
font-size: 20px;
margin-bottom: 1em;
margin-top: 1em;
}
pre, code {
line-height: 1.5;
font-family: Monaco, 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Lucida Console', monospace;
}
pre {
margin-top: 0.5em;
}
h1, h2, h3, p {
font-family: Arial, sans serif;
}
h1, h2, h3 {
border-bottom: solid #CCC 1px;
}
.toc_element {
margin-top: 0.5em;
}
.firstline {
margin-left: 2 em;
}
.method {
margin-top: 1em;
border: solid 1px #CCC;
padding: 1em;
background: #EEE;
}
.details {
font-weight: bold;
font-size: 14px;
}
</style>
<h1><a href="dataproc_v1beta1.html">Google Cloud Dataproc API</a> . <a href="dataproc_v1beta1.projects.html">projects</a> . <a href="dataproc_v1beta1.projects.jobs.html">jobs</a></h1>
<h2>Instance Methods</h2>
<p class="toc_element">
<code><a href="#cancel">cancel(projectId, jobId, body, x__xgafv=None)</a></code></p>
<p class="firstline">Starts a job cancellation request. To access the job resource after cancellation, call [jobs.list](/dataproc/reference/rest/v1beta1/projects.jobs/list) or [jobs.get](/dataproc/reference/rest/v1beta1/projects.jobs/get).</p>
<p class="toc_element">
<code><a href="#delete">delete(projectId, jobId, x__xgafv=None)</a></code></p>
<p class="firstline">Deletes the job from the project. If the job is active, the delete fails, and the response returns `FAILED_PRECONDITION`.</p>
<p class="toc_element">
<code><a href="#get">get(projectId, jobId, x__xgafv=None)</a></code></p>
<p class="firstline">Gets the resource representation for a job in a project.</p>
<p class="toc_element">
<code><a href="#list">list(projectId, pageSize=None, x__xgafv=None, jobStateMatcher=None, pageToken=None, clusterName=None, filter=None)</a></code></p>
<p class="firstline">Lists jobs in a project.</p>
<p class="toc_element">
<code><a href="#list_next">list_next(previous_request, previous_response)</a></code></p>
<p class="firstline">Retrieves the next page of results.</p>
<p class="toc_element">
<code><a href="#submit">submit(projectId, body, x__xgafv=None)</a></code></p>
<p class="firstline">Submits a job to a cluster.</p>
<h3>Method Details</h3>
<div class="method">
<code class="details" id="cancel">cancel(projectId, jobId, body, x__xgafv=None)</code>
<pre>Starts a job cancellation request. To access the job resource after cancellation, call [jobs.list](/dataproc/reference/rest/v1beta1/projects.jobs/list) or [jobs.get](/dataproc/reference/rest/v1beta1/projects.jobs/get).
Args:
projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required)
jobId: string, [Required] The job ID. (required)
body: object, The request body. (required)
The object takes the form of:
{ # A request to cancel a job.
}
x__xgafv: string, V1 error format.
Returns:
An object of the form:
{ # A Cloud Dataproc job resource.
"status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
"hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
"jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
"properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
"a_key": "A String",
},
},
"statusHistory": [ # [Output-only] The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
],
"placement": { # Cloud Dataproc job configuration. # [Required] Job information, including how, when, and where to run the job.
"clusterName": "A String", # [Required] The name of the cluster where the job will be submitted.
"clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Dataproc service when the job is submitted.
},
"reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id.
"projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to.
"jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters.
},
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten.
"a_key": "A String",
},
},
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
"driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program.
"labels": { # [Optional] The labels to associate with this job. Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62} Label values must be between 1 and 63 characters long, and must conform to the following regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63} No more than 64 labels can be associated with a given job.
"a_key": "A String",
},
"submittedBy": "A String", # [Output-only] The email address of the user submitting the job. For jobs submitted on the cluster, the address is username@hostname.
"driverInputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdin of the job's driver program, only set if the job is interactive.
"driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`.
"sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job.
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file that contains the main class.
"properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"yarnApplications": [ # [Output-only] The collection of YARN applications spun up by this job.
{ # A YARN application created by a job. Application information is a subset of org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto.
"progress": 3.14, # [Required] The numerical progress of the application, from 1 to 100.
"state": "A String", # [Required] The application state.
"name": "A String", # [Required] The application name.
"trackingUrl": "A String", # [Optional] The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
},
],
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip.
"A String",
],
"properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job.
"queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code.
"a_key": "A String",
},
},
"interactive": True or False, # [Optional] If set to `true`, the driver's stdin will be kept open and `driver_input_uri` will be set to provide a path at which additional input can be sent to the driver.
}</pre>
</div>
<div class="method">
<code class="details" id="delete">delete(projectId, jobId, x__xgafv=None)</code>
<pre>Deletes the job from the project. If the job is active, the delete fails, and the response returns `FAILED_PRECONDITION`.
Args:
projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required)
jobId: string, [Required] The job ID. (required)
x__xgafv: string, V1 error format.
Returns:
An object of the form:
{ # A generic empty message that you can re-use to avoid defining duplicated empty messages in your APIs. A typical example is to use it as the request or the response type of an API method. For instance: service Foo { rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty); } The JSON representation for `Empty` is empty JSON object `{}`.
}</pre>
</div>
<div class="method">
<code class="details" id="get">get(projectId, jobId, x__xgafv=None)</code>
<pre>Gets the resource representation for a job in a project.
Args:
projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required)
jobId: string, [Required] The job ID. (required)
x__xgafv: string, V1 error format.
Returns:
An object of the form:
{ # A Cloud Dataproc job resource.
"status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
"hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
"jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
"properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
"a_key": "A String",
},
},
"statusHistory": [ # [Output-only] The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
],
"placement": { # Cloud Dataproc job configuration. # [Required] Job information, including how, when, and where to run the job.
"clusterName": "A String", # [Required] The name of the cluster where the job will be submitted.
"clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Dataproc service when the job is submitted.
},
"reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id.
"projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to.
"jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters.
},
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten.
"a_key": "A String",
},
},
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
"driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program.
"labels": { # [Optional] The labels to associate with this job. Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62} Label values must be between 1 and 63 characters long, and must conform to the following regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63} No more than 64 labels can be associated with a given job.
"a_key": "A String",
},
"submittedBy": "A String", # [Output-only] The email address of the user submitting the job. For jobs submitted on the cluster, the address is username@hostname.
"driverInputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdin of the job's driver program, only set if the job is interactive.
"driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`.
"sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job.
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file that contains the main class.
"properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"yarnApplications": [ # [Output-only] The collection of YARN applications spun up by this job.
{ # A YARN application created by a job. Application information is a subset of org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto.
"progress": 3.14, # [Required] The numerical progress of the application, from 1 to 100.
"state": "A String", # [Required] The application state.
"name": "A String", # [Required] The application name.
"trackingUrl": "A String", # [Optional] The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
},
],
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip.
"A String",
],
"properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job.
"queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code.
"a_key": "A String",
},
},
"interactive": True or False, # [Optional] If set to `true`, the driver's stdin will be kept open and `driver_input_uri` will be set to provide a path at which additional input can be sent to the driver.
}</pre>
</div>
<div class="method">
<code class="details" id="list">list(projectId, pageSize=None, x__xgafv=None, jobStateMatcher=None, pageToken=None, clusterName=None, filter=None)</code>
<pre>Lists jobs in a project.
Args:
projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required)
pageSize: integer, [Optional] The number of results to return in each response.
x__xgafv: string, V1 error format.
jobStateMatcher: string, [Optional] Specifies enumerated categories of jobs to list.
pageToken: string, [Optional] The page token, returned by a previous call, to request the next page of results.
clusterName: string, [Optional] If set, the returned jobs list includes only jobs that were submitted to the named cluster.
filter: string, [Optional] A filter constraining which jobs to list. Valid filters contain job state and label terms such as: labels.key1 = val1 AND (labels.k2 = val2 OR labels.k3 = val3)
Returns:
An object of the form:
{ # A list of jobs in a project.
"nextPageToken": "A String", # [Optional] This token is included in the response if there are more results to fetch. To fetch additional results, provide this value as the `page_token` in a subsequent ListJobsRequest.
"jobs": [ # [Output-only] Jobs list.
{ # A Cloud Dataproc job resource.
"status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
"hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
"jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
"properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
"a_key": "A String",
},
},
"statusHistory": [ # [Output-only] The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
],
"placement": { # Cloud Dataproc job configuration. # [Required] Job information, including how, when, and where to run the job.
"clusterName": "A String", # [Required] The name of the cluster where the job will be submitted.
"clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Dataproc service when the job is submitted.
},
"reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id.
"projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to.
"jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters.
},
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten.
"a_key": "A String",
},
},
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
"driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program.
"labels": { # [Optional] The labels to associate with this job. Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62} Label values must be between 1 and 63 characters long, and must conform to the following regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63} No more than 64 labels can be associated with a given job.
"a_key": "A String",
},
"submittedBy": "A String", # [Output-only] The email address of the user submitting the job. For jobs submitted on the cluster, the address is username@hostname.
"driverInputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdin of the job's driver program, only set if the job is interactive.
"driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`.
"sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job.
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file that contains the main class.
"properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"yarnApplications": [ # [Output-only] The collection of YARN applications spun up by this job.
{ # A YARN application created by a job. Application information is a subset of org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto.
"progress": 3.14, # [Required] The numerical progress of the application, from 1 to 100.
"state": "A String", # [Required] The application state.
"name": "A String", # [Required] The application name.
"trackingUrl": "A String", # [Optional] The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
},
],
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip.
"A String",
],
"properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job.
"queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code.
"a_key": "A String",
},
},
"interactive": True or False, # [Optional] If set to `true`, the driver's stdin will be kept open and `driver_input_uri` will be set to provide a path at which additional input can be sent to the driver.
},
],
}</pre>
</div>
<div class="method">
<code class="details" id="list_next">list_next(previous_request, previous_response)</code>
<pre>Retrieves the next page of results.
Args:
previous_request: The request for the previous page. (required)
previous_response: The response from the request for the previous page. (required)
Returns:
A request object that you can call 'execute()' on to request the next
page. Returns None if there are no more items in the collection.
</pre>
</div>
<div class="method">
<code class="details" id="submit">submit(projectId, body, x__xgafv=None)</code>
<pre>Submits a job to a cluster.
Args:
projectId: string, [Required] The ID of the Google Cloud Platform project that the job belongs to. (required)
body: object, The request body. (required)
The object takes the form of:
{ # A request to submit a job.
"job": { # A Cloud Dataproc job resource. # [Required] The job resource.
"status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
"hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
"jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
"properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
"a_key": "A String",
},
},
"statusHistory": [ # [Output-only] The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
],
"placement": { # Cloud Dataproc job configuration. # [Required] Job information, including how, when, and where to run the job.
"clusterName": "A String", # [Required] The name of the cluster where the job will be submitted.
"clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Dataproc service when the job is submitted.
},
"reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id.
"projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to.
"jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters.
},
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten.
"a_key": "A String",
},
},
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
"driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program.
"labels": { # [Optional] The labels to associate with this job. Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62} Label values must be between 1 and 63 characters long, and must conform to the following regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63} No more than 64 labels can be associated with a given job.
"a_key": "A String",
},
"submittedBy": "A String", # [Output-only] The email address of the user submitting the job. For jobs submitted on the cluster, the address is username@hostname.
"driverInputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdin of the job's driver program, only set if the job is interactive.
"driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`.
"sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job.
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file that contains the main class.
"properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"yarnApplications": [ # [Output-only] The collection of YARN applications spun up by this job.
{ # A YARN application created by a job. Application information is a subset of org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto.
"progress": 3.14, # [Required] The numerical progress of the application, from 1 to 100.
"state": "A String", # [Required] The application state.
"name": "A String", # [Required] The application name.
"trackingUrl": "A String", # [Optional] The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
},
],
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip.
"A String",
],
"properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job.
"queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code.
"a_key": "A String",
},
},
"interactive": True or False, # [Optional] If set to `true`, the driver's stdin will be kept open and `driver_input_uri` will be set to provide a path at which additional input can be sent to the driver.
},
}
x__xgafv: string, V1 error format.
Returns:
An object of the form:
{ # A Cloud Dataproc job resource.
"status": { # Cloud Dataproc job status. # [Output-only] The job status. Additional application-specific status information may be contained in the type_job and yarn_applications fields.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
"hadoopJob": { # A Cloud Dataproc job for running Hadoop MapReduce jobs on YARN. # Job is a Hadoop job.
"jarFileUris": [ # [Optional] Jar file URIs to add to the CLASSPATHs of the Hadoop driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Hadoop drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file containing the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Hadoop drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, or .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file containing the main class. Examples: 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 'hdfs:/tmp/test-samples/custom-wordcount.jar' 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
"properties": { # [Optional] A mapping of property names to values, used to configure Hadoop. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site and classes in user code.
"a_key": "A String",
},
},
"statusHistory": [ # [Output-only] The previous job status.
{ # Cloud Dataproc job status.
"state": "A String", # [Required] A state message specifying the overall job state.
"stateStartTime": "A String", # [Output-only] The time when this state was entered.
"details": "A String", # [Optional] Job state details, such as an error description if the state is ERROR.
},
],
"placement": { # Cloud Dataproc job configuration. # [Required] Job information, including how, when, and where to run the job.
"clusterName": "A String", # [Required] The name of the cluster where the job will be submitted.
"clusterUuid": "A String", # [Output-only] A cluster UUID generated by the Dataproc service when the job is submitted.
},
"reference": { # Encapsulates the full scoping used to reference a job. # [Optional] The fully qualified reference to the job, which can be used to obtain the equivalent REST path of the job resource. If this property is not specified when a job is created, the server generates a job_id.
"projectId": "A String", # [Required] The ID of the Google Cloud Platform project that the job belongs to.
"jobId": "A String", # [Required] The job ID, which must be unique within the project. The job ID is generated by the server upon job submission or provided by the user as a means to perform retries without creating duplicate jobs. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or hyphens (-). The maximum length is 512 characters.
},
"sparkSqlJob": { # A Cloud Dataproc job for running Spark SQL queries. # Job is a SparkSql job.
"queryFileUri": "A String", # The HCFS URI of the script that contains SQL queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Spark SQL command: SET `name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Spark SQL's SparkConf. Properties that conflict with values set by the Cloud Dataproc API may be overwritten.
"a_key": "A String",
},
},
"pigJob": { # A Cloud Dataproc job for running Pig queries on YARN. # Job is a Pig job.
"queryFileUri": "A String", # The HCFS URI of the script that contains the Pig queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Pig command: `name=[value]`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"properties": { # [Optional] A mapping of property names to values, used to configure Pig. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/pig/conf/pig.properties, and classes in user code.
"a_key": "A String",
},
},
"driverOutputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdout of the job's driver program.
"labels": { # [Optional] The labels to associate with this job. Label keys must be between 1 and 63 characters long, and must conform to the following regular expression: \p{Ll}\p{Lo}{0,62} Label values must be between 1 and 63 characters long, and must conform to the following regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63} No more than 64 labels can be associated with a given job.
"a_key": "A String",
},
"submittedBy": "A String", # [Output-only] The email address of the user submitting the job. For jobs submitted on the cluster, the address is username@hostname.
"driverInputResourceUri": "A String", # [Output-only] A URI pointing to the location of the stdin of the job's driver program, only set if the job is interactive.
"driverControlFilesUri": "A String", # [Output-only] If present, the location of miscellaneous control files which may be used as part of job setup and handling. If not present, control files may be placed in the same location as `driver_output_uri`.
"sparkJob": { # A Cloud Dataproc job for running Spark applications on YARN. # Job is a Spark job.
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Spark driver and tasks.
"A String",
],
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Spark drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"mainClass": "A String", # The name of the driver's main class. The jar file that contains the class must be in the default CLASSPATH or specified in `jar_file_uris`.
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of Spark drivers and tasks. Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"mainJarFileUri": "A String", # The Hadoop Compatible Filesystem (HCFS) URI of the jar file that contains the main class.
"properties": { # [Optional] A mapping of property names to values, used to configure Spark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"yarnApplications": [ # [Output-only] The collection of YARN applications spun up by this job.
{ # A YARN application created by a job. Application information is a subset of org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto.
"progress": 3.14, # [Required] The numerical progress of the application, from 1 to 100.
"state": "A String", # [Required] The application state.
"name": "A String", # [Required] The application name.
"trackingUrl": "A String", # [Optional] The HTTP URL of the ApplicationMaster, HistoryServer, or TimelineServer that provides application-specific information. The URL uses the internal hostname, and requires a proxy server for resolution and, possibly, access.
},
],
"pysparkJob": { # A Cloud Dataproc job for running PySpark applications on YARN. # Job is a Pyspark job.
"mainPythonFileUri": "A String", # [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file.
"args": [ # [Optional] The arguments to pass to the driver. Do not include arguments, such as `--conf`, that can be set as job properties, since a collision may occur that causes an incorrect job submission.
"A String",
],
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the Python driver and tasks.
"A String",
],
"loggingConfiguration": { # The runtime logging configuration of the job. # [Optional] The runtime log configuration for job execution.
"driverLogLevels": { # The per-package log levels for the driver. This may include "root" package name to configure rootLogger. Examples: 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
"a_key": "A String",
},
},
"fileUris": [ # [Optional] HCFS URIs of files to be copied to the working directory of Python drivers and distributed tasks. Useful for naively parallel tasks.
"A String",
],
"archiveUris": [ # [Optional] HCFS URIs of archives to be extracted in the working directory of .jar, .tar, .tar.gz, .tgz, and .zip.
"A String",
],
"pythonFileUris": [ # [Optional] HCFS file URIs of Python files to pass to the PySpark framework. Supported file types: .py, .egg, and .zip.
"A String",
],
"properties": { # [Optional] A mapping of property names to values, used to configure PySpark. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/spark/conf/spark-defaults.conf and classes in user code.
"a_key": "A String",
},
},
"hiveJob": { # A Cloud Dataproc job for running Hive queries on YARN. # Job is a Hive job.
"queryFileUri": "A String", # The HCFS URI of the script that contains Hive queries.
"scriptVariables": { # [Optional] Mapping of query variable names to values (equivalent to the Hive command: `SET name="value";`).
"a_key": "A String",
},
"jarFileUris": [ # [Optional] HCFS URIs of jar files to add to the CLASSPATH of the Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes and UDFs.
"A String",
],
"queryList": { # A list of queries to run on a cluster. # A list of queries.
"queries": [ # [Required] The queries to execute. You do not need to terminate a query with a semicolon. Multiple queries can be specified in one string by separating each with a semicolon. Here is an example of an Cloud Dataproc API snippet that uses a QueryList to specify a HiveJob: "hiveJob": { "queryList": { "queries": [ "query1", "query2", "query3;query4", ] } }
"A String",
],
},
"continueOnFailure": True or False, # [Optional] Whether to continue executing queries if a query fails. The default value is `false`. Setting to `true` can be useful when executing independent parallel queries.
"properties": { # [Optional] A mapping of property names and values, used to configure Hive. Properties that conflict with values set by the Cloud Dataproc API may be overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, /etc/hive/conf/hive-site.xml, and classes in user code.
"a_key": "A String",
},
},
"interactive": True or False, # [Optional] If set to `true`, the driver's stdin will be kept open and `driver_input_uri` will be set to provide a path at which additional input can be sent to the driver.
}</pre>
</div>
</body></html>