# This is the application.conf file that you are meant to modify during this tutorial. # In the beginning, it has the same contents as application.conf.part-1-solution so that you can test if your local environemnt is setup correctly. global { spark-options { "spark.hadoop.javax.jdo.option.ConnectionURL" = "jdbc:derby://metastore:1527/db;create=true" "spark.hadoop.javax.jdo.option.ConnectionDriverName" = "org.apache.derby.jdbc.ClientDriver" "spark.hadoop.javax.jdo.option.ConnectionUserName" = "sa" "spark.hadoop.javax.jdo.option.ConnectionPassword" = "1234" "spark.sql.shuffle.partitions" = 2 "spark.databricks.delta.snapshotPartitions" = 2 "spark.driver.host" = "localhost" } } # once define the Github SDL repo config: airbyte_sdl_config = { "credentials": { "personal_access_token": "" }, "repository": "smart-data-lake/smart-data-lake", "start_date": "2021-02-01T00:00:00Z", "branch": "documentation develop-spark3 develop-spark2", "page_size_for_large_streams": 100 }, dataObjects { ext-commits { type = AirbyteDataObject config = ${airbyte_sdl_config} streamName = "commits", cmd = { type = CmdScript name = "airbyte_connector_github" linuxCmd = "python3 /mnt/source-github/main.py" } } ext-pr { type = AirbyteDataObject config = ${airbyte_sdl_config} streamName = "pull_requests", cmd = { type = CmdScript name = "airbyte_connector_github" linuxCmd = "python3 /mnt/source-github/main.py" } } stg-commits { type = DeltaLakeTableDataObject path = "~{id}" table { db = "default" name = "stg_commits" primaryKey = [created_at] } } stg-pr { type = DeltaLakeTableDataObject path = "~{id}" table { db = "default" name = "int_pr" primaryKey = [id] } } #if you want to run without the metastore container, JSON files can be created stg-commits-json { type = JsonFileDataObject path = "~{id}" } stg-pr-json { type = JsonFileDataObject path = "~{id}" } } actions { download-commits { type = CopyAction inputId = ext-commits outputId = stg-commits metadata { feed = download } } download-pr { type = CopyAction inputId = ext-pr outputId = stg-pr metadata { feed = download } } }