Getting started with DirectQuery
This tutorial requires access to the DirectQuery modules.
This tutorial will walk you through the basics of DirectQuery by creating an application on a external database.
We’ll see how to:
- Use the DirectQuery module in a project
- Connect to the external database
- Create a data model based on the external database
- Use this data model in a Cube
This tutorial requires:
- A external database
- Familiarity with this database
- Familiarity with the basic concepts of Atoti.
Building the external content
Let’s start by generating the content in the external database
- BigQuery
- ClickHouse
- Databricks
- MS SQL
- Redshift
- Snowflake
- Synapse
Let’s create a new dataset for this tutorial
CREATE SCHEMA tutorial;
Then create the sales table and feed it with 20 facts
CREATE TABLE IF NOT EXISTS `tutorial`.SALES (
SALE_ID STRING NOT NULL,
DATE DATE NOT NULL,
SHOP STRING NOT NULL,
PRODUCT STRING NOT NULL,
QUANTITY INT64 NOT NULL,
UNIT_PRICE FLOAT64 NOT NULL
);
INSERT INTO `tutorial`.SALES VALUES
('S0010','2022-01-31','shop_2','BED_2',3,150),
('S0009','2022-01-31','shop_3','BED_2',1,150),
('S0008','2022-01-31','shop_4','BED_2',1,150),
('S0007','2022-02-01','shop_5','BED_2',1,150),
('S0019','2022-02-02','shop_3','HOO_5',1,48),
('S0018','2022-02-03','shop_3','HOO_5',1,48),
('S0017','2022-02-04','shop_3','HOO_5',1,48),
('S0000','2022-02-04','shop_0','TAB_0',1,210),
('S0001','2022-02-03','shop_1','TAB_0',1,210),
('S0002','2022-02-02','shop_2','TAB_0',1,210),
('S0003','2022-02-01','shop_3','TAB_0',1,210),
('S0004','2022-02-03','shop_1','TAB_1',1,300),
('S0005','2022-02-02','shop_2','TAB_1',1,300),
('S0006','2022-02-01','shop_4','TAB_1',2,300),
('S0013','2022-02-01','shop_4','TSH_3',1,22),
('S0012','2022-02-02','shop_5','TSH_3',1,22),
('S0011','2022-02-03','shop_5','TSH_3',1,22),
('S0016','2022-01-31','shop_1','TSH_4',2,24),
('S0015','2022-02-01','shop_2','TSH_4',2,24),
('S0014','2022-02-02','shop_4','TSH_4',1,24);
Let’s create and feed a PRODUCTS table containing information about the products
CREATE TABLE IF NOT EXISTS `tutorial`.PRODUCTS (
PRODUCT_ID STRING NOT NULL,
CATEGORY STRING NOT NULL,
SUB_CATEGORY STRING NOT NULL,
SIZE STRING NOT NULL,
PURCHASE_PRICE FLOAT64 NOT NULL,
COLOR STRING NOT NULL,
BRAND STRING NOT NULL
);
INSERT INTO `tutorial`.PRODUCTS VALUES
('TAB_0','Furniture','Table','1m80',190,'black','Basic'),
('TAB_1','Furniture','Table','2m40',280,'white','Mega'),
('BED_2','Furniture','Bed','Single',127,'red','Mega'),
('TSH_3','Cloth','Tshirt','M',19,'brown','Over'),
('TSH_4','Cloth','Tshirt','L',20,'black','Over'),
('HOO_5','Cloth','Hoodie','M',38,'red','Mega');
Let’s create a new database for this tutorial
CREATE DATABASE IF NOT EXISTS TUTORIAL;
We can then add a PRODUCTS table containing information about our products:
CREATE TABLE IF NOT EXISTS
TUTORIAL.PRODUCTS
(
`PRODUCT_ID` String,
`CATEGORY` String,
`SUB_CATEGORY` String,
`SIZE` String,
`PURCHASE_PRICE` Float64,
`COLOR` String,
`BRAND` String
)
ENGINE = MergeTree()
PRIMARY KEY (`PRODUCT_ID`);
INSERT INTO TUTORIAL.PRODUCTS VALUES
('TAB_0','Furniture','Table','1m80',190,'black','Basic'),
('TAB_1','Furniture','Table','2m40',280,'white','Mega'),
('BED_2','Furniture','Bed','Single',127,'red','Mega'),
('TSH_3','Cloth','Tshirt','M',19,'brown','Over'),
('TSH_4','Cloth','Tshirt','L',20,'black','Over'),
('HOO_5','Cloth','Hoodie','M',38,'red','Mega');
Finally we create the SALES table and feed it with 20 individual sales:
CREATE TABLE IF NOT EXISTS
TUTORIAL.SALES
(
`SALE_ID` String,
`DATE` Date32,
`SHOP` String,
`PRODUCT` String,
`QUANTITY` Float64,
`UNIT_PRICE` Float64
)
ENGINE = MergeTree()
PRIMARY KEY (`SALE_ID`);
INSERT INTO TUTORIAL.SALES VALUES
('S0010','2022-01-31','shop_2','BED_2',3,150),
('S0009','2022-01-31','shop_3','BED_2',1,150),
('S0008','2022-01-31','shop_4','BED_2',1,150),
('S0007','2022-02-01','shop_5','BED_2',1,150),
('S0019','2022-02-02','shop_3','HOO_5',1,48),
('S0018','2022-02-03','shop_3','HOO_5',1,48),
('S0017','2022-02-04','shop_3','HOO_5',1,48),
('S0000','2022-02-04','shop_0','TAB_0',1,210),
('S0001','2022-02-03','shop_1','TAB_0',1,210),
('S0002','2022-02-02','shop_2','TAB_0',1,210),
('S0003','2022-02-01','shop_3','TAB_0',1,210),
('S0004','2022-02-03','shop_1','TAB_1',1,300),
('S0005','2022-02-02','shop_2','TAB_1',1,300),
('S0006','2022-02-01','shop_4','TAB_1',2,300),
('S0013','2022-02-01','shop_4','TSH_3',1,22),
('S0012','2022-02-02','shop_5','TSH_3',1,22),
('S0011','2022-02-03','shop_5','TSH_3',1,22),
('S0016','2022-01-31','shop_1','TSH_4',2,24),
('S0015','2022-02-01','shop_2','TSH_4',2,24),
('S0014','2022-02-02','shop_4','TSH_4',1,24);
Let’s create a new schema for this tutorial
CREATE SCHEMA IF NOT EXISTS tutorial
We can then add a PRODUCTS table containing information about our products:
CREATE TABLE IF NOT EXISTS
`tutorial`.`products` (
`PRODUCT_ID` STRING NOT NULL,
`CATEGORY` STRING NOT NULL,
`SUB_CATEGORY` STRING NOT NULL,
`SIZE` STRING NOT NULL,
`PURCHASE_PRICE` DOUBLE NOT NULL,
`COLOR` STRING NOT NULL,
`BRAND` STRING NOT NULL
);
INSERT INTO
`tutorial`.`products`
VALUES
('TAB_0', 'Furniture', 'Table', '1m80', 190, 'black', 'Basic'),
('TAB_1', 'Furniture', 'Table', '2m40', 280, 'white', 'Mega'),
('BED_2', 'Furniture', 'Bed', 'Single', 127, 'red', 'Mega'),
('TSH_3', 'Cloth', 'Tshirt', 'M', 19, 'brown', 'Over'),
('TSH_4', 'Cloth', 'Tshirt', 'L', 20, 'black', 'Over'),
('HOO_5', 'Cloth', 'Hoodie', 'M', 38, 'red', 'Mega');
Finally we create the SALES table and feed it with 20 individual sales:
CREATE TABLE IF NOT EXISTS
`tutorial`.`sales`
(
`SALE_ID` STRING,
`DATE` DATE,
`SHOP` STRING,
`PRODUCT` STRING,
`QUANTITY` DOUBLE,
`UNIT_PRICE` DOUBLE
);
INSERT INTO `tutorial`.`sales` VALUES
('S0010','2022-01-31','shop_2','BED_2',3,150),
('S0009','2022-01-31','shop_3','BED_2',1,150),
('S0008','2022-01-31','shop_4','BED_2',1,150),
('S0007','2022-02-01','shop_5','BED_2',1,150),
('S0019','2022-02-02','shop_3','HOO_5',1,48),
('S0018','2022-02-03','shop_3','HOO_5',1,48),
('S0017','2022-02-04','shop_3','HOO_5',1,48),
('S0000','2022-02-04','shop_0','TAB_0',1,210),
('S0001','2022-02-03','shop_1','TAB_0',1,210),
('S0002','2022-02-02','shop_2','TAB_0',1,210),
('S0003','2022-02-01','shop_3','TAB_0',1,210),
('S0004','2022-02-03','shop_1','TAB_1',1,300),
('S0005','2022-02-02','shop_2','TAB_1',1,300),
('S0006','2022-02-01','shop_4','TAB_1',2,300),
('S0013','2022-02-01','shop_4','TSH_3',1,22),
('S0012','2022-02-02','shop_5','TSH_3',1,22),
('S0011','2022-02-03','shop_5','TSH_3',1,22),
('S0016','2022-01-31','shop_1','TSH_4',2,24),
('S0015','2022-02-01','shop_2','TSH_4',2,24),
('S0014','2022-02-02','shop_4','TSH_4',1,24);
Let’s create a new dataset for this tutorial
CREATE SCHEMA TUTORIAL;
Let’s create and feed a PRODUCTS table containing information about the products
if not exists (select * from sys.tables where name = 'PRODUCTS')
CREATE TABLE TUTORIAL.PRODUCTS (
[PRODUCT_ID] VARCHAR(50) NOT NULL,
[CATEGORY] VARCHAR(50) NOT NULL,
[SUB_CATEGORY] VARCHAR(50) NOT NULL,
[SIZE] VARCHAR(50) NOT NULL,
[PURCHASE_PRICE] FLOAT NOT NULL,
[COLOR] VARCHAR(50) NOT NULL,
[BRAND] VARCHAR(50) NOT NULL
);
INSERT INTO
TUTORIAL.PRODUCTS
VALUES
('TAB_0', 'Furniture', 'Table', '1m80', 190, 'black', 'Basic'),
('TAB_1', 'Furniture', 'Table', '2m40', 280, 'white', 'Mega'),
('BED_2', 'Furniture', 'Bed', 'Single', 127, 'red', 'Mega'),
('TSH_3', 'Cloth', 'Tshirt', 'M', 19, 'brown', 'Over'),
('TSH_4', 'Cloth', 'Tshirt', 'L', 20, 'black', 'Over'),
('HOO_5', 'Cloth', 'Hoodie', 'M', 38, 'red', 'Mega');
Then create the SALES table and feed it with 20 facts
CREATE TABLE TUTORIAL.SALES (
[SALE_ID] VARCHAR(50),
[DATE] DATE,
[SHOP] VARCHAR(50),
[PRODUCT] VARCHAR(50),
[QUANTITY] BIGINT,
[UNIT_PRICE] FLOAT
);
INSERT INTO
TUTORIAL.SALES
VALUES
('S0010', '2022-01-31', 'shop_2', 'BED_2', 3, 150),
('S0009', '2022-01-31', 'shop_3', 'BED_2', 1, 150),
('S0008', '2022-01-31', 'shop_4', 'BED_2', 1, 150),
('S0007', '2022-02-01', 'shop_5', 'BED_2', 1, 150),
('S0019', '2022-02-02', 'shop_3', 'HOO_5', 1, 48),
('S0018', '2022-02-03', 'shop_3', 'HOO_5', 1, 48),
('S0017', '2022-02-04', 'shop_3', 'HOO_5', 1, 48),
('S0000', '2022-02-04', 'shop_0', 'TAB_0', 1, 210),
('S0001', '2022-02-03', 'shop_1', 'TAB_0', 1, 210),
('S0002', '2022-02-02', 'shop_2', 'TAB_0', 1, 210),
('S0003', '2022-02-01', 'shop_3', 'TAB_0', 1, 210),
('S0004', '2022-02-03', 'shop_1', 'TAB_1', 1, 300),
('S0005', '2022-02-02', 'shop_2', 'TAB_1', 1, 300),
('S0006', '2022-02-01', 'shop_4', 'TAB_1', 2, 300),
('S0013', '2022-02-01', 'shop_4', 'TSH_3', 1, 22),
('S0012', '2022-02-02', 'shop_5', 'TSH_3', 1, 22),
('S0011', '2022-02-03', 'shop_5', 'TSH_3', 1, 22),
('S0016', '2022-01-31', 'shop_1', 'TSH_4', 2, 24),
('S0015', '2022-02-01', 'shop_2', 'TSH_4', 2, 24),
('S0014', '2022-02-02', 'shop_4', 'TSH_4', 1, 24);
Let’s create a new dataset for this tutorial
CREATE SCHEMA tutorial;
Let’s create and feed a PRODUCTS table containing information about the products
SET enable_case_sensitive_identifier TO true;
CREATE TABLE dev.tutorial."PRODUCTS" (
"PRODUCT_ID" VARCHAR NOT NULL,
"CATEGORY" VARCHAR NOT NULL,
"SUB_CATEGORY" VARCHAR NOT NULL,
"SIZE" VARCHAR NOT NULL,
"PURCHASE_PRICE" FLOAT8 NOT NULL,
"COLOR" VARCHAR NOT NULL,
"BRAND" VARCHAR NOT NULL,
PRIMARY KEY ("PRODUCT_ID"));
INSERT INTO dev.tutorial."PRODUCTS" VALUES
('TAB_0', 'Furniture', 'Table', '1m80', 190, 'black', 'Basic'),
('TAB_1', 'Furniture', 'Table', '2m40', 280, 'white', 'Mega'),
('BED_2', 'Furniture', 'Bed', 'Single', 127, 'red', 'Mega'),
('TSH_3', 'Cloth', 'Tshirt', 'M', 19, 'brown', 'Over'),
('TSH_4', 'Cloth', 'Tshirt', 'L', 20, 'black', 'Over'),
('HOO_5', 'Cloth', 'Hoodie', 'M', 38, 'red', 'Mega');
Then create the SALES table and feed it with 20 facts
SET enable_case_sensitive_identifier TO true;
CREATE TABLE dev.tutorial."SALES" (
"SALE_ID" VARCHAR NOT NULL,
"DATE" DATE NOT NULL,
"SHOP" VARCHAR NOT NULL,
"PRODUCT" VARCHAR NOT NULL,
"QUANTITY" FLOAT8 NOT NULL,
"UNIT_PRICE" FLOAT8 NOT NULL,
PRIMARY KEY ("SALE_ID"), FOREIGN KEY ("PRODUCT") references dev.tutorial."PRODUCTS"("PRODUCT_ID"));
INSERT INTO dev.tutorial."SALES" VALUES
('S0010', '2022-01-31', 'shop_2', 'BED_2', 3, 150),
('S0009', '2022-01-31', 'shop_3', 'BED_2', 1, 150),
('S0008', '2022-01-31', 'shop_4', 'BED_2', 1, 150),
('S0007', '2022-02-01', 'shop_5', 'BED_2', 1, 150),
('S0019', '2022-02-02', 'shop_3', 'HOO_5', 1, 48),
('S0018', '2022-02-03', 'shop_3', 'HOO_5', 1, 48),
('S0017', '2022-02-04', 'shop_3', 'HOO_5', 1, 48),
('S0000', '2022-02-04', 'shop_0', 'TAB_0', 1, 210),
('S0001', '2022-02-03', 'shop_1', 'TAB_0', 1, 210),
('S0002', '2022-02-02', 'shop_2', 'TAB_0', 1, 210),
('S0003', '2022-02-01', 'shop_3', 'TAB_0', 1, 210),
('S0004', '2022-02-03', 'shop_1', 'TAB_1', 1, 300),
('S0005', '2022-02-02', 'shop_2', 'TAB_1', 1, 300),
('S0006', '2022-02-01', 'shop_4', 'TAB_1', 2, 300),
('S0013', '2022-02-01', 'shop_4', 'TSH_3', 1, 22),
('S0012', '2022-02-02', 'shop_5', 'TSH_3', 1, 22),
('S0011', '2022-02-03', 'shop_5', 'TSH_3', 1, 22),
('S0016', '2022-01-31', 'shop_1', 'TSH_4', 2, 24),
('S0015', '2022-02-01', 'shop_2', 'TSH_4', 2, 24),
('S0014', '2022-02-02', 'shop_4', 'TSH_4', 1, 24);
Let’s create a new dataset for this tutorial
CREATE SCHEMA DATABASE_NAME.SCHEMA_NAME;
Let’s create and feed a PRODUCTS table containing information about the products
CREATE TRANSIENT TABLE IF NOT EXISTS PRODUCTS ( PRODUCT_ID STRING NOT NULL,CATEGORY STRING NOT NULL,SUB_CATEGORY STRING NOT NULL,SIZE STRING NOT NULL,PURCHASE_PRICE FLOAT NOT NULL,COLOR STRING NOT NULL,BRAND STRING NOT NULL, PRIMARY KEY (PRODUCT_ID));
INSERT INTO PRODUCTS SELECT 'TAB_0','Furniture','Table','1m80',190,'black','Basic';
INSERT INTO PRODUCTS SELECT 'TAB_1','Furniture','Table','2m40',280,'white','Mega';
INSERT INTO PRODUCTS SELECT 'BED_2','Furniture','Bed','Single',127,'red','Mega';
INSERT INTO PRODUCTS SELECT 'TSH_3','Cloth','Tshirt','M',19,'brown','Over';
INSERT INTO PRODUCTS SELECT 'TSH_4','Cloth','Tshirt','L',20,'black','Over';
INSERT INTO PRODUCTS SELECT 'HOO_5','Cloth','Hoodie','M',38,'red','Mega';
Then create the sales table and feed it with 20 facts
CREATE TRANSIENT TABLE IF NOT EXISTS SALES ( SALE_ID STRING NOT NULL,DATE DATE NOT NULL,SHOP STRING NOT NULL,PRODUCT STRING NOT NULL,QUANTITY FLOAT NOT NULL,UNIT_PRICE FLOAT NOT NULL,PRIMARY KEY (SALE_ID), FOREIGN KEY (PRODUCT) references PRODUCTS(PRODUCT_ID));
INSERT INTO SALES SELECT 'S0010','2022-01-31','shop_2','BED_2',3,150;
INSERT INTO SALES SELECT 'S0009','2022-01-31','shop_3','BED_2',1,150;
INSERT INTO SALES SELECT 'S0008','2022-01-31','shop_4','BED_2',1,150;
INSERT INTO SALES SELECT 'S0007','2022-02-01','shop_5','BED_2',1,150;
INSERT INTO SALES SELECT 'S0019','2022-02-02','shop_3','HOO_5',1,48;
INSERT INTO SALES SELECT 'S0018','2022-02-03','shop_3','HOO_5',1,48;
INSERT INTO SALES SELECT 'S0017','2022-02-04','shop_3','HOO_5',1,48;
INSERT INTO SALES SELECT 'S0000','2022-02-04','shop_0','TAB_0',1,210;
INSERT INTO SALES SELECT 'S0001','2022-02-03','shop_1','TAB_0',1,210;
INSERT INTO SALES SELECT 'S0002','2022-02-02','shop_2','TAB_0',1,210;
INSERT INTO SALES SELECT 'S0003','2022-02-01','shop_3','TAB_0',1,210;
INSERT INTO SALES SELECT 'S0004','2022-02-03','shop_1','TAB_1',1,300;
INSERT INTO SALES SELECT 'S0005','2022-02-02','shop_2','TAB_1',1,300;
INSERT INTO SALES SELECT 'S0006','2022-02-01','shop_4','TAB_1',2,300;
INSERT INTO SALES SELECT 'S0013','2022-02-01','shop_4','TSH_3',1,22;
INSERT INTO SALES SELECT 'S0012','2022-02-02','shop_5','TSH_3',1,22;
INSERT INTO SALES SELECT 'S0011','2022-02-03','shop_5','TSH_3',1,22;
INSERT INTO SALES SELECT 'S0016','2022-01-31','shop_1','TSH_4',2,24;
INSERT INTO SALES SELECT 'S0015','2022-02-01','shop_2','TSH_4',2,24;
INSERT INTO SALES SELECT 'S0014','2022-02-02','shop_4','TSH_4',1,24;
Let's create a new dataset for this tutorial.
CREATE SCHEMA TUTORIAL;
We will use csv files and in Azure Blob Storage that we will access through Synapse:
Once those files are loaded into Azure, we can create the external tables in Synapse.
IF NOT EXISTS (SELECT * from sys.external_tables WHERE name='SALES')
CREATE EXTERNAL TABLE test_resources.TUTORIAL.SALES (
[SALE_ID] VARCHAR(50),
[DATE] DATE,
[SHOP] VARCHAR(50),
[PRODUCT] VARCHAR(50),
[QUANTITY] BIGINT,
[UNIT_PRICE] FLOAT
)
WITH (
LOCATION = 'your-container/path/to/tuto/sales.csv',
DATA_SOURCE = [YourDataSource],
FILE_FORMAT = [CsvFormatWithHeader]
)
GO
IF NOT EXISTS (SELECT * from sys.external_tables WHERE name='PRODUCTS')
CREATE EXTERNAL TABLE test_resources.TUTORIAL.PRODUCTS (
[PRODUCT_ID] VARCHAR(50),
[CATEGORY] VARCHAR(50),
[SUB_CATEGORY] VARCHAR(50),
[SIZE] VARCHAR(50),
[PURCHASE_PRICE] FLOAT,
[COLOR] VARCHAR(50),
[BRAND] VARCHAR(50)
)
WITH (
LOCATION = 'your-container/path/to/tutorial/products.csv',
DATA_SOURCE = [YourDataSource],
FILE_FORMAT = [CsvFormatWithHeader]
)
GO
If you have not already, you will need to create an External Data Source and an External File Format.
The file format used here will need to skip csv headers. Click here to see an example.
IF NOT EXISTS (SELECT * FROM sys.external_file_formats WHERE name = 'CsvFormatWithHeader')
CREATE EXTERNAL FILE FORMAT [CsvFormatWithHeader]
WITH (
FORMAT_TYPE = DELIMITEDTEXT,
FORMAT_OPTIONS (
FIELD_TERMINATOR = ',',
FIRST_ROW = 2, -- The csv files used have column names
USE_TYPE_DEFAULT = FALSE
)
);
Importing DirectQuery modules
Additionally to classic Atoti modules you will need to import two modules:
- One generic module to interact with external databases
<dependency>
<groupId>com.activeviam.activepivot</groupId>
<artifactId>activepivot-external-database</artifactId>
<version>6.0.15-sb3</version>
</dependency>
- One to connect to your specific database
- BigQuery
- ClickHouse
- Databricks
- MS SQL
- Redshift
- Snowflake
- Synapse
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-bigquery</artifactId>
<version>6.0.15-sb3</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-clickhouse</artifactId>
<version>6.0.15-sb3</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-databricks</artifactId>
<version>6.0.15-sb3</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-mssql</artifactId>
<version>6.0.15-sb3</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-redshift</artifactId>
<version>6.0.15-sb3</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-snowflake</artifactId>
<version>6.0.15-sb3</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-synapse</artifactId>
<version>6.0.15-sb3</version>
</dependency>
Using the external database in DirectQuery
Create the database configuration
- BigQuery
- ClickHouse
- Databricks
- MS SQL
- Redshift
- Snowflake
- Synapse
Let's start by creating a connection to BigQuery.
In this example we use a service account and have set an environment variable.
For more details and options about authentication on BigQuery we recommend to follow the official documentation and read the Connect to BigQuery page.
final BigQuery bigquery =
BigQueryOptions.newBuilder().setProjectId(PROJECT_ID).build().getService();
This connection can then be wrapped into an Atoti session.
final Session session = Session.createSession(bigquery);
Let's start by creating a configuration which will allow us to connect to the ClickHouse database. HOST is the URL of the server hosting the database. If you run it locally, it can be "localhost". PORT is the port on which the server is listening. The default for ClickHouse is 8123. DATABASE would be "TUTORIAL" based on the previous setup. Finally, USERNAME and PASSWORD should be set appropriately. By default, they could be left null.
final ClickhouseProperties properties =
ClickhouseProperties.builder()
.host(HOST)
.protocol(ClickHouseProtocol.HTTP)
.port(port)
.database("TUTORIAL")
.userName(USERNAME)
.password(PASSWORD)
.build();
Then, we use these properties to create an Atoti session.
final Session session = Session.createSession(config, settings);
For more details and options about authentication on ClickHouse we recommend to read the Connect to ClickHouse page.
Here we define properties of a Databricks connection.
We connect to a database with a user password authentication.
You can read Connect to Databricks page for more details about user
authentication.
The JDBC connection string also contains parameters to set the user and the database.
final DatabricksConnectionProperties properties =
DatabricksConnectionProperties.builder()
.connectionString(connectionString)
.additionalOption(
DatabricksConnectionProperties.PASSWORD_PROPERTY_KEY, "your-plain-password")
.build();
You must also configure some DirectQuery's options or simply use the default settings to begin with:
final DatabricksDirectQuerySettings settings = DatabricksDirectQuerySettings.defaults();
An Atoti session can be set up combining JDBC connection string, these properties and these settings.
final Session session = Session.createSession(properties, settings);
Let's start by creating a configuration which will allow us to connect to the Microsoft SQL Server database. HOST is the URL of the server hosting the database. If you run it locally, it can be "localhost". PORT is the port on which the server is listening. The default for MS SQL is 1443. DATABASE would be "TUTORIAL" based on the previous setup. Finally, USERNAME and PASSWORD should be set appropriately.
final SQLServerDataSource dataSource = new SQLServerDataSource();
dataSource.setUser(username);
dataSource.setPassword(password);
// Set up the database to connect to
dataSource.setPortNumber(port);
dataSource.setDatabaseName(DATABASE_NAME);
// Setup connection security parameters
dataSource.setEncrypt(true);
dataSource.setTrustServerCertificate(true);
dataSource.setSendTimeAsDatetime(false);
// Setup number of connection attempts (first tries may fail when pool needs to warm up)
dataSource.setConnectRetryCount(CONNECT_RETRY_ATTEMPTS);
dataSource.setConnectRetryInterval(CONNECT_RETRY_INTERVAL);
You must also configure some DirectQuery's options or simply use the default settings to begin with:
final MsSqlDirectQuerySettings settings = MsSqlDirectQuerySettings.defaults();
Then, we use these properties and settings to create an Atoti session.
final Session session = Session.createSession(dataSource, settings);
For more details and options about authentication on ClickHouse we recommend to read the Connect to MS SQL page.
Here we define properties of a Redshift connection.
We connect to a database with a user password authentication.
You can read Connect to Redshift page for more details about user
authentication.
The JDBC connection string also contains parameters to set the user and the database.
final RedshiftProperties properties =
RedshiftProperties.builder()
.connectionString(connectionString)
.additionalOption(RedshiftProperty.PASSWORD, "your-plain-password")
.build();
You must also configure some DirectQuery's options or simply use the default settings to begin with:
final RedshiftDirectQuerySettings settings = RedshiftDirectQuerySettings.defaults();
An Atoti session can be set up combining JDBC connection string, these properties and these settings.
final Session session = Session.createSession(properties, settings);
Here we define properties of a Snowflake connection.
We connect to a database with a user password authentication. Note that we set a warehouse in this connection
properties.
You can read Connect to Snowflake page for more details about user
authentication.
The JDBC connection string also contains parameters to set the user and the database.
final SnowflakeProperties properties =
SnowflakeProperties.builder()
.connectionString(connectionString)
.warehouse("your-warehouse")
.additionalOption(SFSessionProperty.PASSWORD, "your-plain-password")
.build();
You must also configure some DirectQuery's options or simply use the default settings to begin with:
final SnowflakeDirectQuerySettings settings = SnowflakeDirectQuerySettings.defaults();
An Atoti session can be set up combining JDBC connection string, these properties and these settings.
final Session session = Session.createSession(properties, settings);
Let's start by defining some properties to connect to Synapse.
In this example we use SQL authentication.
For more details and options about authentication on Synapse we recommend to read the
Connect to Synapse page.
final SQLServerDataSource dataSource = new SQLServerDataSource();
// Setup authentication
// authenticationMethod = "SqlPassword", "ActiveDirectoryPassword", ... depending on which login
// are used
dataSource.setAuthentication(authenticationMethod);
dataSource.setUser(USERNAME);
dataSource.setPassword(PASSWORD);
// Set up the database to connect to
dataSource.setServerName(SERVER_NAME);
dataSource.setDatabaseName(DATABASE_NAME);
// Setup connection security parameters
dataSource.setEncrypt(true);
dataSource.setHostNameInCertificate(HOST_NAME_IN_CERTIFICATE);
dataSource.setTrustServerCertificate(false);
dataSource.setSendTimeAsDatetime(false);
// Setup number of connection attempts (first tries may fail when pool needs to warm up)
dataSource.setConnectRetryCount(CONNECT_RETRY_ATTEMPTS);
dataSource.setConnectRetryInterval(CONNECT_RETRY_INTERVAL);
dataSource.setLoginTimeout(CONNECT_RETRY_ATTEMPTS * CONNECT_RETRY_INTERVAL);
You must also configure some DirectQuery's options or simply use the default settings to begin with:
final SynapseDirectQuerySettings settings = SynapseDirectQuerySettings.defaults();
Then, we use this connection and settings to create an Atoti session.
final Session session = Session.createSession(dataSource, settings);
Add external tables into DirectQuery
The second step is to create the external database schema description.
We start by building the table definition with the help of a external database discoverer.
- BigQuery
- ClickHouse
- Databricks
- MS SQL
- Redshift
- Snowflake
- Synapse
final Table salesTable = session.discoverTable(TableId.of(PROJECT_ID, "tutorial", "SALES"));
final Table productsTable =
session.discoverTable(TableId.of(PROJECT_ID, "tutorial", "PRODUCTS"));
Now we want to create the tables for our application, by discovering their structure on the external database.
final Table salesTable = session.discoverTable("TUTORIAL", "SALES");
final Table productsTable = session.discoverTable("TUTORIAL", "PRODUCTS");
Now we want to create the tables for our application, by discovering their structure on the external database.
final Table salesTable =
session.discoverTable(new SqlTableId(TEST_CATALOG_NAME, TUTORIAL_SCHEMA_NAME, "SALES"));
final Table productsTable =
session.discoverTable(new SqlTableId(TEST_CATALOG_NAME, TUTORIAL_SCHEMA_NAME, "PRODUCTS"));
Now we want to create the tables for our application, by discovering their structure on the external database.
final Table salesTable =
session.discoverTable(new SqlTableId(DATABASE_NAME, SCHEMA_NAME, "SALES"));
final Table productsTable =
session.discoverTable(new SqlTableId(DATABASE_NAME, SCHEMA_NAME, "PRODUCTS"));
Now we want to create the tables for our application, by discovering their structure on the external database.
final Table salesTable =
session.discoverTable(new SqlTableId(TEST_DATABASE_NAME, TUTORIAL_SCHEMA_NAME, "SALES"));
final Table productsTable =
session.discoverTable(new SqlTableId(TEST_DATABASE_NAME, TUTORIAL_SCHEMA_NAME, "PRODUCTS"));
In this snippet we use Snowflake table discoverer to resolve the table definition from SQL schema metadata.
final Table salesTable =
session.discoverTable(new SqlTableId(TEST_DATABASE_NAME, TUTORIAL_SCHEMA_NAME, "SALES"));
final Table productsTable =
session.discoverTable(new SqlTableId(TEST_DATABASE_NAME, TUTORIAL_SCHEMA_NAME, "PRODUCTS"));
final Table salesTable =
session.discoverTable(new SqlTableId(DATABASE_NAME, "TUTORIAL", "SALES"));
final Table productsTable =
session.discoverTable(new SqlTableId(DATABASE_NAME, "TUTORIAL", "PRODUCTS"));
Create a join between tables
Then we create a join between the tables of our example.
Sales has a many-to-one relationship with Products on the field product_id.
final SqlJoin join =
SqlJoin.builder()
.name("SALES_TO_PRODUCTS")
.fromTable("SALES")
.toTable("PRODUCTS")
.fieldMappings(Set.of(new FieldMapping("PRODUCT", "PRODUCT_ID")))
.build();
Relationship optionality
When defining a join you can set the Relation Optionality.
The default relationship is Optional
, it means that some rows in Sales table could have a product which is
missing from the Products table.
You can set this relationship to Mandatory
, if you are sure that all products used in Sales table are in Products
table.
Mandatory
relationship could lead to better performance in some connector features
like Incremental refresh.
Create the database
Combining table and join definitions, we can know build our schema and our database around it.
- BigQuery
- ClickHouse
- Databricks
- MS SQL
- Redshift
- Snowflake
- Synapse
final Schema schema =
Schema.builder()
.withExternalTables(List.of(salesTable, productsTable), List.of(join))
.build();
final Schema schema =
Schema.builder()
.withExternalTables(List.of(salesTable, productsTable), List.of(join))
.build();
final Schema schema =
Schema.builder()
.withExternalTables(List.of(salesTable, productsTable), List.of(join))
.build();
final Schema schema =
Schema.builder()
.withExternalTables(List.of(salesTable, productsTable), List.of(join))
.build();
final Schema schema =
Schema.builder()
.withExternalTables(List.of(salesTable, productsTable), List.of(join))
.build();
final Schema schema =
Schema.builder()
.withExternalTables(List.of(salesTable, productsTable), List.of(join))
.build();
final Schema schema =
Schema.builder()
.withExternalTables(List.of(salesTable, productsTable), List.of(join))
.build();
Building an app on the external database
We can now build an ActivePivot on top of this database.
- BigQuery
- ClickHouse
- Databricks
- MS SQL
- Redshift
- Snowflake
- Synapse
final ISelectionDescription selection =
StartBuilding.selection(schema).fromBaseStore("SALES").withAllReachableFields().build();
final IActivePivotInstanceDescription cubeDescription =
StartBuilding.cube("MyCube")
.withMeasures(
superMeasureBuilder ->
superMeasureBuilder.withCalculations(
c -> {
// You can use the database fields to define aggregations
Copper.sum("QUANTITY").as("QUANTITY.SUM").publish(c);
Copper.max("UNIT_PRICE").as("Max price").publish(c);
}))
.withDimensions(
builder ->
builder
.withSingleLevelDimensions(
List.of("DATE", "SHOP", "PRODUCT", "SIZE", "BRAND"))
.withDimension("CATEGORY")
.withHierarchyOfSameName()
.withLevels("CATEGORY", "SUB_CATEGORY"))
.build();
final IActivePivotManagerDescription managerDescription =
StartBuilding.managerDescription()
.withName("MyManager")
.withSchema()
.withSelection(selection)
.withCube(cubeDescription)
.build();
final Application app =
session.applicationBuilder().schema(schema).managerDescription(managerDescription).build();
this.resources.register(app);
app.start();
final IActivePivotManager manager = app.getManager();
final ISelectionDescription selection =
StartBuilding.selection(schema).fromBaseStore("SALES").withAllReachableFields().build();
final IActivePivotInstanceDescription cubeDescription =
StartBuilding.cube("MyCube")
.withMeasures(
superMeasureBuilder ->
superMeasureBuilder.withCalculations(
c -> {
// You can use the database fields to define aggregations
Copper.sum("QUANTITY").as("QUANTITY.SUM").publish(c);
Copper.max("UNIT_PRICE").as("Max price").publish(c);
}))
.withDimensions(
builder ->
builder
.withSingleLevelDimensions(
List.of("DATE", "SHOP", "PRODUCT", "SIZE", "BRAND"))
.withDimension("CATEGORY")
.withHierarchyOfSameName()
.withLevels("CATEGORY", "SUB_CATEGORY"))
.build();
final IActivePivotManagerDescription managerDescription =
StartBuilding.managerDescription()
.withName("MyManager")
.withSchema()
.withSelection(selection)
.withCube(cubeDescription)
.build();
final Application app =
session.applicationBuilder().schema(schema).managerDescription(managerDescription).build();
this.resources.register(app);
app.start();
final IActivePivotManager manager = app.getManager();
final ISelectionDescription selection =
StartBuilding.selection(schema).fromBaseStore("SALES").withAllReachableFields().build();
final IActivePivotInstanceDescription cubeDescription =
StartBuilding.cube("MyCube")
.withMeasures(
superMeasureBuilder ->
superMeasureBuilder.withCalculations(
c -> {
// You can use the database fields to define aggregations
Copper.sum("QUANTITY").as("QUANTITY.SUM").publish(c);
Copper.max("UNIT_PRICE").as("Max price").publish(c);
}))
.withDimensions(
builder ->
builder
.withSingleLevelDimensions(
List.of("DATE", "SHOP", "PRODUCT", "SIZE", "BRAND"))
.withDimension("CATEGORY")
.withHierarchyOfSameName()
.withLevels("CATEGORY", "SUB_CATEGORY"))
.build();
final IActivePivotManagerDescription managerDescription =
StartBuilding.managerDescription()
.withName("MyManager")
.withSchema()
.withSelection(selection)
.withCube(cubeDescription)
.build();
final Application app =
session.applicationBuilder().schema(schema).managerDescription(managerDescription).build();
this.resources.register(app);
app.start();
final IActivePivotManager manager = app.getManager();
final ISelectionDescription selection =
StartBuilding.selection(schema).fromBaseStore("SALES").withAllReachableFields().build();
final IActivePivotInstanceDescription cubeDescription =
StartBuilding.cube("MyCube")
.withMeasures(
superMeasureBuilder ->
superMeasureBuilder.withCalculations(
c -> {
// You can use the database fields to define aggregations
Copper.sum("QUANTITY").as("QUANTITY.SUM").publish(c);
Copper.max("UNIT_PRICE").as("Max price").publish(c);
}))
.withDimensions(
builder ->
builder
.withSingleLevelDimensions(
List.of("DATE", "SHOP", "PRODUCT", "SIZE", "BRAND"))
.withDimension("CATEGORY")
.withHierarchyOfSameName()
.withLevels("CATEGORY", "SUB_CATEGORY"))
.build();
final IActivePivotManagerDescription managerDescription =
StartBuilding.managerDescription()
.withName("MyManager")
.withSchema()
.withSelection(selection)
.withCube(cubeDescription)
.build();
final Application app =
session.applicationBuilder().schema(schema).managerDescription(managerDescription).build();
this.resources.register(app);
app.start();
final IActivePivotManager manager = app.getManager();
final ISelectionDescription selection =
StartBuilding.selection(schema).fromBaseStore("SALES").withAllReachableFields().build();
final IActivePivotInstanceDescription cubeDescription =
StartBuilding.cube("MyCube")
.withMeasures(
superMeasureBuilder ->
superMeasureBuilder.withCalculations(
c -> {
// You can use the database fields to define aggregations
Copper.sum("QUANTITY").as("QUANTITY.SUM").publish(c);
Copper.max("UNIT_PRICE").as("Max price").publish(c);
}))
.withDimensions(
builder ->
builder
.withSingleLevelDimensions(
List.of("DATE", "SHOP", "PRODUCT", "SIZE", "BRAND"))
.withDimension("CATEGORY")
.withHierarchyOfSameName()
.withLevels("CATEGORY", "SUB_CATEGORY"))
.build();
final IActivePivotManagerDescription managerDescription =
StartBuilding.managerDescription()
.withName("MyManager")
.withSchema()
.withSelection(selection)
.withCube(cubeDescription)
.build();
final Application app =
session.applicationBuilder().schema(schema).managerDescription(managerDescription).build();
this.resources.register(app);
app.start();
final IActivePivotManager manager = app.getManager();
final ISelectionDescription selection =
StartBuilding.selection(schema).fromBaseStore("SALES").withAllReachableFields().build();
final IActivePivotInstanceDescription cubeDescription =
StartBuilding.cube("MyCube")
.withMeasures(
superMeasureBuilder ->
superMeasureBuilder.withCalculations(
c -> {
// You can use the database fields to define aggregations
Copper.sum("QUANTITY").as("QUANTITY.SUM").publish(c);
Copper.max("UNIT_PRICE").as("Max price").publish(c);
}))
.withDimensions(
builder ->
builder
.withSingleLevelDimensions(
List.of("DATE", "SHOP", "PRODUCT", "SIZE", "BRAND"))
.withDimension("CATEGORY")
.withHierarchyOfSameName()
.withLevels("CATEGORY", "SUB_CATEGORY"))
.build();
final IActivePivotManagerDescription managerDescription =
StartBuilding.managerDescription()
.withName("MyManager")
.withSchema()
.withSelection(selection)
.withCube(cubeDescription)
.build();
final Application app =
session.applicationBuilder().schema(schema).managerDescription(managerDescription).build();
this.resources.register(app);
app.start();
final IActivePivotManager manager = app.getManager();
final ISelectionDescription selection =
StartBuilding.selection(schema).fromBaseStore("SALES").withAllReachableFields().build();
final IActivePivotInstanceDescription cubeDescription =
StartBuilding.cube("MyCube")
.withMeasures(
superMeasureBuilder ->
superMeasureBuilder.withCalculations(
c -> {
// You can use the database fields to define aggregations
Copper.sum("QUANTITY").as("QUANTITY.SUM").publish(c);
Copper.max("UNIT_PRICE").as("Max price").publish(c);
}))
.withDimensions(
builder ->
builder
.withSingleLevelDimensions(
List.of("DATE", "SHOP", "PRODUCT", "SIZE", "BRAND"))
.withDimension("CATEGORY")
.withHierarchyOfSameName()
.withLevels("CATEGORY", "SUB_CATEGORY"))
.build();
final IActivePivotManagerDescription managerDescription =
StartBuilding.managerDescription()
.withName("MyManager")
.withSchema()
.withSelection(selection)
.withCube(cubeDescription)
.build();
final Application app =
session.applicationBuilder().schema(schema).managerDescription(managerDescription).build();
this.resources.register(app);
app.start();
final IActivePivotManager manager = app.getManager();
That's it, the cube is built and you can run queries on it.
final IActivePivotVersion pivot =
manager.getActivePivot("MyCube").getHead(IEpoch.MASTER_BRANCH_NAME);
final ICellSet result =
ActivePivotQueryRunner.create()
.withWildcardCoordinate("SHOP")
.forMeasures("QUANTITY.SUM")
.run(pivot);
GetAggregatesResultCellset.check(pivot, result).printCellSet();