This is an automated email from the ASF dual-hosted git repository.
iwasakims pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bigtop.git
The following commit(s) were added to refs/heads/master by this push:
new b5d2d82cf BIGTOP-4471. Upgrade the Puppet manifest for Airflow to
adopt PostgreSQL and LocalExecutor (#1366)
b5d2d82cf is described below
commit b5d2d82cf14088b81ee938b4caa242b5d75a16fa
Author: Kengo Seki <[email protected]>
AuthorDate: Fri Jul 25 00:13:23 2025 +0900
BIGTOP-4471. Upgrade the Puppet manifest for Airflow to adopt PostgreSQL
and LocalExecutor (#1366)
---
bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml | 8 +-
.../puppet/parser/functions/parse_database_url.rb | 22 ++++++
.../puppet/modules/airflow/manifests/init.pp | 88 ++++++++++++++++++++--
3 files changed, 108 insertions(+), 10 deletions(-)
diff --git a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
index 630ba9166..e6e9f8506 100644
--- a/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
+++ b/bigtop-deploy/puppet/hieradata/bigtop/cluster.yaml
@@ -215,7 +215,7 @@ flink::common::rest_port: "8081"
ranger::admin::admin_password: "Admin01234"
# Airflow
-airflow::server::executor: "SequentialExecutor"
-airflow::server::load_examples: "True"
-airflow::server::sql_alchemy_conn: "sqlite:////var/lib/airflow/airflow.db"
-airflow::server::install_bigpetstore_example: "False"
+airflow::server::executor: "LocalExecutor"
+airflow::server::load_examples: false
+airflow::server::sql_alchemy_conn:
"postgresql+psycopg2://airflow_user:airflow_pass@localhost/airflow_db"
+airflow::server::install_bigpetstore_example: false
diff --git
a/bigtop-deploy/puppet/modules/airflow/lib/puppet/parser/functions/parse_database_url.rb
b/bigtop-deploy/puppet/modules/airflow/lib/puppet/parser/functions/parse_database_url.rb
new file mode 100644
index 000000000..165dda38d
--- /dev/null
+++
b/bigtop-deploy/puppet/modules/airflow/lib/puppet/parser/functions/parse_database_url.rb
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+require 'uri'
+
+module Puppet::Parser::Functions
+ newfunction(:parse_database_url, :type => :rvalue) do |args|
+ uri = URI.parse(args[0])
+ [uri.scheme.split('+')[0], uri.user, uri.password,
uri.path.delete_prefix('/')]
+ end
+end
diff --git a/bigtop-deploy/puppet/modules/airflow/manifests/init.pp
b/bigtop-deploy/puppet/modules/airflow/manifests/init.pp
index 0dd9cb15d..3b7984080 100644
--- a/bigtop-deploy/puppet/modules/airflow/manifests/init.pp
+++ b/bigtop-deploy/puppet/modules/airflow/manifests/init.pp
@@ -20,7 +20,11 @@ class airflow {
}
}
- class server($executor, $load_examples, $sql_alchemy_conn,
$install_bigpetstore_example=False) {
+ class server($executor="SequentialExecutor",
+ $load_examples=true,
+ $sql_alchemy_conn="sqlite:////var/lib/airflow/airflow.db",
+ $install_bigpetstore_example=false,
+ ) {
package { 'airflow':
ensure => latest,
}
@@ -32,11 +36,83 @@ class airflow {
require => Package['airflow'],
}
- exec { 'airflow-db-init':
- command => '/usr/lib/airflow/bin/airflow db init',
- environment => ['AIRFLOW_HOME=/var/lib/airflow'],
- user => 'airflow',
- require => File['/var/lib/airflow/airflow.cfg'],
+ # The built-in scanf function
(https://www.puppet.com/docs/puppet/7/function.html#scanf) doesn't work
+ # on Debian and Ubuntu for some reason, so we define a function for
parsing SQLAlchemy's database URL
+ # (https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls)
ourselves here.
+ $parsed_url = parse_database_url($sql_alchemy_conn)
+ $dialect = $parsed_url[0]
+ $username = $parsed_url[1]
+ $password = $parsed_url[2]
+ $database = $parsed_url[3]
+
+ if $dialect =~ /^postgres/ {
+ # Install Airflow's Postgres Provider
+ exec { 'install-postgres-provider':
+ command => ['/usr/lib/airflow/bin/python3', '-m', 'pip',
'install', 'apache-airflow-providers-postgres'],
+ environment => ['AIRFLOW_HOME=/var/lib/airflow'],
+ require => File['/var/lib/airflow/airflow.cfg'],
+ user => 'root',
+ }
+
+ # Install and enable PostgreSQL
+ if $operatingsystem =~ /^(?i:(ubuntu|debian))$/ {
+ package { 'postgresql':
+ ensure => latest,
+ }
+ service { 'postgresql':
+ ensure => running,
+ require => Package['postgresql'],
+ }
+ } else {
+ package { 'postgresql-server':
+ ensure => latest
+ }
+ exec { 'initdb':
+ command => '/usr/bin/pg_ctl initdb -D /var/lib/pgsql/data',
+ user => 'postgres',
+ require => Package['postgresql-server']
+ }
+ service { 'postgresql':
+ ensure => running,
+ require => Exec['initdb']
+ }
+ }
+
+ # Set up Airflow's database backend in accordance with
+ #
https://airflow.apache.org/docs/apache-airflow/2.10.5/howto/set-up-database.html#setting-up-a-postgresql-database
+ exec { 'create-airflow-database':
+ command => ["/usr/bin/psql", "-c", "CREATE DATABASE ${database}"],
+ user => 'postgres',
+ require => Service['postgresql']
+ }
+ exec { 'create-airflow-user':
+ command => ["/usr/bin/psql", "-c", "CREATE USER ${username} WITH
PASSWORD '${password}'"],
+ user => 'postgres',
+ require => Exec['create-airflow-database']
+ }
+ exec { 'grant-airflow-privileges':
+ command => ["/usr/bin/psql", "-c", "GRANT ALL PRIVILEGES ON DATABASE
${database} TO ${username}"],
+ user => 'postgres',
+ require => Exec['create-airflow-user'],
+ }
+ exec { 'alter-database-owner':
+ command => ["/usr/bin/psql", "-c", "ALTER DATABASE ${database} OWNER
TO ${username}"],
+ user => 'postgres',
+ require => Exec['grant-airflow-privileges'],
+ }
+ exec { 'airflow-db-init':
+ command => '/usr/lib/airflow/bin/airflow db init',
+ environment => ['AIRFLOW_HOME=/var/lib/airflow'],
+ user => 'airflow',
+ require => Exec['alter-database-owner'],
+ }
+ } else {
+ exec { 'airflow-db-init':
+ command => '/usr/lib/airflow/bin/airflow db init',
+ environment => ['AIRFLOW_HOME=/var/lib/airflow'],
+ user => 'airflow',
+ require => File['/var/lib/airflow/airflow.cfg'],
+ }
}
exec { 'airflow-users-create':