Documentation Index
Fetch the complete documentation index at: https://docs.activeviam.com/llms.txt
Use this file to discover all available pages before exploring further.
Getting started with DirectQuery
This tutorial requires access to the DirectQuery modules.
- Use the DirectQuery module in a project
- Connect to the external database
- Create a data model based on the external database
- Use this data model in a Cube
- A external database
- Familiarity with this database
- Familiarity with the basic concepts of Atoti.
Building the external content
Let’s start by generating the content in the external database- BigQuery
- ClickHouse
- Databricks
- Generic JDBC
- MS SQL
- Redshift
- Snowflake
- Synapse
Let’s create a new dataset for this tutorialThen create the sales table and feed it with 20 factsLet’s create and feed a PRODUCTS table containing information about the products
CREATE SCHEMA tutorial;
CREATE TABLE IF NOT EXISTS `tutorial`.SALES (
SALE_ID STRING NOT NULL,
DATE DATE NOT NULL,
SHOP STRING NOT NULL,
PRODUCT STRING NOT NULL,
QUANTITY INT64 NOT NULL,
UNIT_PRICE FLOAT64 NOT NULL
);
INSERT INTO `tutorial`.SALES VALUES
('S0010','2022-01-31','shop_2','BED_2',3,150),
('S0009','2022-01-31','shop_3','BED_2',1,150),
('S0008','2022-01-31','shop_4','BED_2',1,150),
('S0007','2022-02-01','shop_5','BED_2',1,150),
('S0019','2022-02-02','shop_3','HOO_5',1,48),
('S0018','2022-02-03','shop_3','HOO_5',1,48),
('S0017','2022-02-04','shop_3','HOO_5',1,48),
('S0000','2022-02-04','shop_0','TAB_0',1,210),
('S0001','2022-02-03','shop_1','TAB_0',1,210),
('S0002','2022-02-02','shop_2','TAB_0',1,210),
('S0003','2022-02-01','shop_3','TAB_0',1,210),
('S0004','2022-02-03','shop_1','TAB_1',1,300),
('S0005','2022-02-02','shop_2','TAB_1',1,300),
('S0006','2022-02-01','shop_4','TAB_1',2,300),
('S0013','2022-02-01','shop_4','TSH_3',1,22),
('S0012','2022-02-02','shop_5','TSH_3',1,22),
('S0011','2022-02-03','shop_5','TSH_3',1,22),
('S0016','2022-01-31','shop_1','TSH_4',2,24),
('S0015','2022-02-01','shop_2','TSH_4',2,24),
('S0014','2022-02-02','shop_4','TSH_4',1,24);
CREATE TABLE IF NOT EXISTS `tutorial`.PRODUCTS (
PRODUCT_ID STRING NOT NULL,
CATEGORY STRING NOT NULL,
SUB_CATEGORY STRING NOT NULL,
SIZE STRING NOT NULL,
PURCHASE_PRICE FLOAT64 NOT NULL,
COLOR STRING NOT NULL,
BRAND STRING NOT NULL
);
INSERT INTO `tutorial`.PRODUCTS VALUES
('TAB_0','Furniture','Table','1m80',190,'black','Basic'),
('TAB_1','Furniture','Table','2m40',280,'white','Mega'),
('BED_2','Furniture','Bed','Single',127,'red','Mega'),
('TSH_3','Cloth','Tshirt','M',19,'brown','Over'),
('TSH_4','Cloth','Tshirt','L',20,'black','Over'),
('HOO_5','Cloth','Hoodie','M',38,'red','Mega');
Let’s create a new database for this tutorialWe can then add a PRODUCTS table containing information about our products:Finally we create the SALES table and feed it with 20 individual sales:
CREATE DATABASE IF NOT EXISTS TUTORIAL;
CREATE TABLE IF NOT EXISTS
TUTORIAL.PRODUCTS
(
`PRODUCT_ID` String,
`CATEGORY` String,
`SUB_CATEGORY` String,
`SIZE` String,
`PURCHASE_PRICE` Float64,
`COLOR` String,
`BRAND` String
)
ENGINE = MergeTree()
PRIMARY KEY (`PRODUCT_ID`);
INSERT INTO TUTORIAL.PRODUCTS VALUES
('TAB_0','Furniture','Table','1m80',190,'black','Basic'),
('TAB_1','Furniture','Table','2m40',280,'white','Mega'),
('BED_2','Furniture','Bed','Single',127,'red','Mega'),
('TSH_3','Cloth','Tshirt','M',19,'brown','Over'),
('TSH_4','Cloth','Tshirt','L',20,'black','Over'),
('HOO_5','Cloth','Hoodie','M',38,'red','Mega');
CREATE TABLE IF NOT EXISTS
TUTORIAL.SALES
(
`SALE_ID` String,
`DATE` Date32,
`SHOP` String,
`PRODUCT` String,
`QUANTITY` Float64,
`UNIT_PRICE` Float64
)
ENGINE = MergeTree()
PRIMARY KEY (`SALE_ID`);
INSERT INTO TUTORIAL.SALES VALUES
('S0010','2022-01-31','shop_2','BED_2',3,150),
('S0009','2022-01-31','shop_3','BED_2',1,150),
('S0008','2022-01-31','shop_4','BED_2',1,150),
('S0007','2022-02-01','shop_5','BED_2',1,150),
('S0019','2022-02-02','shop_3','HOO_5',1,48),
('S0018','2022-02-03','shop_3','HOO_5',1,48),
('S0017','2022-02-04','shop_3','HOO_5',1,48),
('S0000','2022-02-04','shop_0','TAB_0',1,210),
('S0001','2022-02-03','shop_1','TAB_0',1,210),
('S0002','2022-02-02','shop_2','TAB_0',1,210),
('S0003','2022-02-01','shop_3','TAB_0',1,210),
('S0004','2022-02-03','shop_1','TAB_1',1,300),
('S0005','2022-02-02','shop_2','TAB_1',1,300),
('S0006','2022-02-01','shop_4','TAB_1',2,300),
('S0013','2022-02-01','shop_4','TSH_3',1,22),
('S0012','2022-02-02','shop_5','TSH_3',1,22),
('S0011','2022-02-03','shop_5','TSH_3',1,22),
('S0016','2022-01-31','shop_1','TSH_4',2,24),
('S0015','2022-02-01','shop_2','TSH_4',2,24),
('S0014','2022-02-02','shop_4','TSH_4',1,24);
Let’s create a new schema for this tutorialWe can then add a PRODUCTS table containing information about our products:Finally we create the SALES table and feed it with 20 individual sales:
CREATE SCHEMA IF NOT EXISTS tutorial
CREATE TABLE IF NOT EXISTS
`tutorial`.`products` (
`PRODUCT_ID` STRING NOT NULL,
`CATEGORY` STRING NOT NULL,
`SUB_CATEGORY` STRING NOT NULL,
`SIZE` STRING NOT NULL,
`PURCHASE_PRICE` DOUBLE NOT NULL,
`COLOR` STRING NOT NULL,
`BRAND` STRING NOT NULL
);
INSERT INTO
`tutorial`.`products`
VALUES
('TAB_0', 'Furniture', 'Table', '1m80', 190, 'black', 'Basic'),
('TAB_1', 'Furniture', 'Table', '2m40', 280, 'white', 'Mega'),
('BED_2', 'Furniture', 'Bed', 'Single', 127, 'red', 'Mega'),
('TSH_3', 'Cloth', 'Tshirt', 'M', 19, 'brown', 'Over'),
('TSH_4', 'Cloth', 'Tshirt', 'L', 20, 'black', 'Over'),
('HOO_5', 'Cloth', 'Hoodie', 'M', 38, 'red', 'Mega');
CREATE TABLE IF NOT EXISTS
`tutorial`.`sales`
(
`SALE_ID` STRING,
`DATE` DATE,
`SHOP` STRING,
`PRODUCT` STRING,
`QUANTITY` DOUBLE,
`UNIT_PRICE` DOUBLE
);
INSERT INTO `tutorial`.`sales` VALUES
('S0010','2022-01-31','shop_2','BED_2',3,150),
('S0009','2022-01-31','shop_3','BED_2',1,150),
('S0008','2022-01-31','shop_4','BED_2',1,150),
('S0007','2022-02-01','shop_5','BED_2',1,150),
('S0019','2022-02-02','shop_3','HOO_5',1,48),
('S0018','2022-02-03','shop_3','HOO_5',1,48),
('S0017','2022-02-04','shop_3','HOO_5',1,48),
('S0000','2022-02-04','shop_0','TAB_0',1,210),
('S0001','2022-02-03','shop_1','TAB_0',1,210),
('S0002','2022-02-02','shop_2','TAB_0',1,210),
('S0003','2022-02-01','shop_3','TAB_0',1,210),
('S0004','2022-02-03','shop_1','TAB_1',1,300),
('S0005','2022-02-02','shop_2','TAB_1',1,300),
('S0006','2022-02-01','shop_4','TAB_1',2,300),
('S0013','2022-02-01','shop_4','TSH_3',1,22),
('S0012','2022-02-02','shop_5','TSH_3',1,22),
('S0011','2022-02-03','shop_5','TSH_3',1,22),
('S0016','2022-01-31','shop_1','TSH_4',2,24),
('S0015','2022-02-01','shop_2','TSH_4',2,24),
('S0014','2022-02-02','shop_4','TSH_4',1,24);
In your database, create a schema named “TUTORIAL”,
and then add 2 tables with the following structure and data. Depending on your database, the actual SQL might need
to be adjusted.A PRODUCTS table containing information about our products:A SALES table with 20 individual sales:
CREATE TABLE TUTORIAL.PRODUCTS (
[PRODUCT_ID] VARCHAR(50) NOT NULL,
[CATEGORY] VARCHAR(50) NOT NULL,
[SUB_CATEGORY] VARCHAR(50) NOT NULL,
[SIZE] VARCHAR(50) NOT NULL,
[PURCHASE_PRICE] FLOAT NOT NULL,
[COLOR] VARCHAR(50) NOT NULL,
[BRAND] VARCHAR(50) NOT NULL
);
INSERT INTO
TUTORIAL.PRODUCTS
VALUES
('TAB_0', 'Furniture', 'Table', '1m80', 190, 'black', 'Basic'),
('TAB_1', 'Furniture', 'Table', '2m40', 280, 'white', 'Mega'),
('BED_2', 'Furniture', 'Bed', 'Single', 127, 'red', 'Mega'),
('TSH_3', 'Cloth', 'Tshirt', 'M', 19, 'brown', 'Over'),
('TSH_4', 'Cloth', 'Tshirt', 'L', 20, 'black', 'Over'),
('HOO_5', 'Cloth', 'Hoodie', 'M', 38, 'red', 'Mega');
CREATE TABLE TUTORIAL.SALES (
[SALE_ID] VARCHAR(50),
[DATE] DATE,
[SHOP] VARCHAR(50),
[PRODUCT] VARCHAR(50),
[QUANTITY] BIGINT,
[UNIT_PRICE] FLOAT
);
INSERT INTO
TUTORIAL.SALES
VALUES
('S0010', '2022-01-31', 'shop_2', 'BED_2', 3, 150),
('S0009', '2022-01-31', 'shop_3', 'BED_2', 1, 150),
('S0008', '2022-01-31', 'shop_4', 'BED_2', 1, 150),
('S0007', '2022-02-01', 'shop_5', 'BED_2', 1, 150),
('S0019', '2022-02-02', 'shop_3', 'HOO_5', 1, 48),
('S0018', '2022-02-03', 'shop_3', 'HOO_5', 1, 48),
('S0017', '2022-02-04', 'shop_3', 'HOO_5', 1, 48),
('S0000', '2022-02-04', 'shop_0', 'TAB_0', 1, 210),
('S0001', '2022-02-03', 'shop_1', 'TAB_0', 1, 210),
('S0002', '2022-02-02', 'shop_2', 'TAB_0', 1, 210),
('S0003', '2022-02-01', 'shop_3', 'TAB_0', 1, 210),
('S0004', '2022-02-03', 'shop_1', 'TAB_1', 1, 300),
('S0005', '2022-02-02', 'shop_2', 'TAB_1', 1, 300),
('S0006', '2022-02-01', 'shop_4', 'TAB_1', 2, 300),
('S0013', '2022-02-01', 'shop_4', 'TSH_3', 1, 22),
('S0012', '2022-02-02', 'shop_5', 'TSH_3', 1, 22),
('S0011', '2022-02-03', 'shop_5', 'TSH_3', 1, 22),
('S0016', '2022-01-31', 'shop_1', 'TSH_4', 2, 24),
('S0015', '2022-02-01', 'shop_2', 'TSH_4', 2, 24),
('S0014', '2022-02-02', 'shop_4', 'TSH_4', 1, 24);
Let’s create a new dataset for this tutorialLet’s create and feed a PRODUCTS table containing information about the productsThen create the SALES table and feed it with 20 facts
CREATE SCHEMA TUTORIAL;
CREATE TABLE TUTORIAL.PRODUCTS (
[PRODUCT_ID] VARCHAR(50) NOT NULL,
[CATEGORY] VARCHAR(50) NOT NULL,
[SUB_CATEGORY] VARCHAR(50) NOT NULL,
[SIZE] VARCHAR(50) NOT NULL,
[PURCHASE_PRICE] FLOAT NOT NULL,
[COLOR] VARCHAR(50) NOT NULL,
[BRAND] VARCHAR(50) NOT NULL
);
INSERT INTO
TUTORIAL.PRODUCTS
VALUES
('TAB_0', 'Furniture', 'Table', '1m80', 190, 'black', 'Basic'),
('TAB_1', 'Furniture', 'Table', '2m40', 280, 'white', 'Mega'),
('BED_2', 'Furniture', 'Bed', 'Single', 127, 'red', 'Mega'),
('TSH_3', 'Cloth', 'Tshirt', 'M', 19, 'brown', 'Over'),
('TSH_4', 'Cloth', 'Tshirt', 'L', 20, 'black', 'Over'),
('HOO_5', 'Cloth', 'Hoodie', 'M', 38, 'red', 'Mega');
CREATE TABLE TUTORIAL.SALES (
[SALE_ID] VARCHAR(50),
[DATE] DATE,
[SHOP] VARCHAR(50),
[PRODUCT] VARCHAR(50),
[QUANTITY] BIGINT,
[UNIT_PRICE] FLOAT
);
INSERT INTO
TUTORIAL.SALES
VALUES
('S0010', '2022-01-31', 'shop_2', 'BED_2', 3, 150),
('S0009', '2022-01-31', 'shop_3', 'BED_2', 1, 150),
('S0008', '2022-01-31', 'shop_4', 'BED_2', 1, 150),
('S0007', '2022-02-01', 'shop_5', 'BED_2', 1, 150),
('S0019', '2022-02-02', 'shop_3', 'HOO_5', 1, 48),
('S0018', '2022-02-03', 'shop_3', 'HOO_5', 1, 48),
('S0017', '2022-02-04', 'shop_3', 'HOO_5', 1, 48),
('S0000', '2022-02-04', 'shop_0', 'TAB_0', 1, 210),
('S0001', '2022-02-03', 'shop_1', 'TAB_0', 1, 210),
('S0002', '2022-02-02', 'shop_2', 'TAB_0', 1, 210),
('S0003', '2022-02-01', 'shop_3', 'TAB_0', 1, 210),
('S0004', '2022-02-03', 'shop_1', 'TAB_1', 1, 300),
('S0005', '2022-02-02', 'shop_2', 'TAB_1', 1, 300),
('S0006', '2022-02-01', 'shop_4', 'TAB_1', 2, 300),
('S0013', '2022-02-01', 'shop_4', 'TSH_3', 1, 22),
('S0012', '2022-02-02', 'shop_5', 'TSH_3', 1, 22),
('S0011', '2022-02-03', 'shop_5', 'TSH_3', 1, 22),
('S0016', '2022-01-31', 'shop_1', 'TSH_4', 2, 24),
('S0015', '2022-02-01', 'shop_2', 'TSH_4', 2, 24),
('S0014', '2022-02-02', 'shop_4', 'TSH_4', 1, 24);
Let’s create a new dataset for this tutorialLet’s create and feed a PRODUCTS table containing information about the productsThen create the SALES table and feed it with 20 facts
CREATE SCHEMA tutorial;
SET enable_case_sensitive_identifier TO true;
CREATE TABLE dev.tutorial."PRODUCTS" (
"PRODUCT_ID" VARCHAR NOT NULL,
"CATEGORY" VARCHAR NOT NULL,
"SUB_CATEGORY" VARCHAR NOT NULL,
"SIZE" VARCHAR NOT NULL,
"PURCHASE_PRICE" FLOAT8 NOT NULL,
"COLOR" VARCHAR NOT NULL,
"BRAND" VARCHAR NOT NULL,
PRIMARY KEY ("PRODUCT_ID"));
INSERT INTO dev.tutorial."PRODUCTS" VALUES
('TAB_0', 'Furniture', 'Table', '1m80', 190, 'black', 'Basic'),
('TAB_1', 'Furniture', 'Table', '2m40', 280, 'white', 'Mega'),
('BED_2', 'Furniture', 'Bed', 'Single', 127, 'red', 'Mega'),
('TSH_3', 'Cloth', 'Tshirt', 'M', 19, 'brown', 'Over'),
('TSH_4', 'Cloth', 'Tshirt', 'L', 20, 'black', 'Over'),
('HOO_5', 'Cloth', 'Hoodie', 'M', 38, 'red', 'Mega');
SET enable_case_sensitive_identifier TO true;
CREATE TABLE dev.tutorial."SALES" (
"SALE_ID" VARCHAR NOT NULL,
"DATE" DATE NOT NULL,
"SHOP" VARCHAR NOT NULL,
"PRODUCT" VARCHAR NOT NULL,
"QUANTITY" FLOAT8 NOT NULL,
"UNIT_PRICE" FLOAT8 NOT NULL,
PRIMARY KEY ("SALE_ID"), FOREIGN KEY ("PRODUCT") references dev.tutorial."PRODUCTS"("PRODUCT_ID"));
INSERT INTO dev.tutorial."SALES" VALUES
('S0010', '2022-01-31', 'shop_2', 'BED_2', 3, 150),
('S0009', '2022-01-31', 'shop_3', 'BED_2', 1, 150),
('S0008', '2022-01-31', 'shop_4', 'BED_2', 1, 150),
('S0007', '2022-02-01', 'shop_5', 'BED_2', 1, 150),
('S0019', '2022-02-02', 'shop_3', 'HOO_5', 1, 48),
('S0018', '2022-02-03', 'shop_3', 'HOO_5', 1, 48),
('S0017', '2022-02-04', 'shop_3', 'HOO_5', 1, 48),
('S0000', '2022-02-04', 'shop_0', 'TAB_0', 1, 210),
('S0001', '2022-02-03', 'shop_1', 'TAB_0', 1, 210),
('S0002', '2022-02-02', 'shop_2', 'TAB_0', 1, 210),
('S0003', '2022-02-01', 'shop_3', 'TAB_0', 1, 210),
('S0004', '2022-02-03', 'shop_1', 'TAB_1', 1, 300),
('S0005', '2022-02-02', 'shop_2', 'TAB_1', 1, 300),
('S0006', '2022-02-01', 'shop_4', 'TAB_1', 2, 300),
('S0013', '2022-02-01', 'shop_4', 'TSH_3', 1, 22),
('S0012', '2022-02-02', 'shop_5', 'TSH_3', 1, 22),
('S0011', '2022-02-03', 'shop_5', 'TSH_3', 1, 22),
('S0016', '2022-01-31', 'shop_1', 'TSH_4', 2, 24),
('S0015', '2022-02-01', 'shop_2', 'TSH_4', 2, 24),
('S0014', '2022-02-02', 'shop_4', 'TSH_4', 1, 24);
Let’s create a new dataset for this tutorialLet’s create and feed a PRODUCTS table containing information about the productsThen create the sales table and feed it with 20 facts
CREATE SCHEMA DATABASE_NAME.SCHEMA_NAME;
CREATE TRANSIENT TABLE IF NOT EXISTS PRODUCTS ( PRODUCT_ID STRING NOT NULL,CATEGORY STRING NOT NULL,SUB_CATEGORY STRING NOT NULL,SIZE STRING NOT NULL,PURCHASE_PRICE FLOAT NOT NULL,COLOR STRING NOT NULL,BRAND STRING NOT NULL, PRIMARY KEY (PRODUCT_ID));
INSERT INTO PRODUCTS SELECT 'TAB_0','Furniture','Table','1m80',190,'black','Basic';
INSERT INTO PRODUCTS SELECT 'TAB_1','Furniture','Table','2m40',280,'white','Mega';
INSERT INTO PRODUCTS SELECT 'BED_2','Furniture','Bed','Single',127,'red','Mega';
INSERT INTO PRODUCTS SELECT 'TSH_3','Cloth','Tshirt','M',19,'brown','Over';
INSERT INTO PRODUCTS SELECT 'TSH_4','Cloth','Tshirt','L',20,'black','Over';
INSERT INTO PRODUCTS SELECT 'HOO_5','Cloth','Hoodie','M',38,'red','Mega';
CREATE TRANSIENT TABLE IF NOT EXISTS SALES ( SALE_ID STRING NOT NULL,DATE DATE NOT NULL,SHOP STRING NOT NULL,PRODUCT STRING NOT NULL,QUANTITY FLOAT NOT NULL,UNIT_PRICE FLOAT NOT NULL,PRIMARY KEY (SALE_ID), FOREIGN KEY (PRODUCT) references PRODUCTS(PRODUCT_ID));
INSERT INTO SALES SELECT 'S0010','2022-01-31','shop_2','BED_2',3,150;
INSERT INTO SALES SELECT 'S0009','2022-01-31','shop_3','BED_2',1,150;
INSERT INTO SALES SELECT 'S0008','2022-01-31','shop_4','BED_2',1,150;
INSERT INTO SALES SELECT 'S0007','2022-02-01','shop_5','BED_2',1,150;
INSERT INTO SALES SELECT 'S0019','2022-02-02','shop_3','HOO_5',1,48;
INSERT INTO SALES SELECT 'S0018','2022-02-03','shop_3','HOO_5',1,48;
INSERT INTO SALES SELECT 'S0017','2022-02-04','shop_3','HOO_5',1,48;
INSERT INTO SALES SELECT 'S0000','2022-02-04','shop_0','TAB_0',1,210;
INSERT INTO SALES SELECT 'S0001','2022-02-03','shop_1','TAB_0',1,210;
INSERT INTO SALES SELECT 'S0002','2022-02-02','shop_2','TAB_0',1,210;
INSERT INTO SALES SELECT 'S0003','2022-02-01','shop_3','TAB_0',1,210;
INSERT INTO SALES SELECT 'S0004','2022-02-03','shop_1','TAB_1',1,300;
INSERT INTO SALES SELECT 'S0005','2022-02-02','shop_2','TAB_1',1,300;
INSERT INTO SALES SELECT 'S0006','2022-02-01','shop_4','TAB_1',2,300;
INSERT INTO SALES SELECT 'S0013','2022-02-01','shop_4','TSH_3',1,22;
INSERT INTO SALES SELECT 'S0012','2022-02-02','shop_5','TSH_3',1,22;
INSERT INTO SALES SELECT 'S0011','2022-02-03','shop_5','TSH_3',1,22;
INSERT INTO SALES SELECT 'S0016','2022-01-31','shop_1','TSH_4',2,24;
INSERT INTO SALES SELECT 'S0015','2022-02-01','shop_2','TSH_4',2,24;
INSERT INTO SALES SELECT 'S0014','2022-02-02','shop_4','TSH_4',1,24;
Let’s create a new dataset for this tutorial.We will use csv files and in Azure Blob Storage that we will access through Synapse:Once those files are loaded into Azure,
we can create the external tables in Synapse.If you have not already, you will need to create an External Data Source
and an External File Format.
CREATE SCHEMA TUTORIAL;
IF NOT EXISTS (SELECT * from sys.external_tables WHERE name='SALES')
CREATE EXTERNAL TABLE test_resources.TUTORIAL.SALES (
[SALE_ID] VARCHAR(50),
[DATE] DATE,
[SHOP] VARCHAR(50),
[PRODUCT] VARCHAR(50),
[QUANTITY] BIGINT,
[UNIT_PRICE] FLOAT
)
WITH (
LOCATION = 'your-container/path/to/tuto/sales.csv',
DATA_SOURCE = [YourDataSource],
FILE_FORMAT = [CsvFormatWithHeader]
)
GO
IF NOT EXISTS (SELECT * from sys.external_tables WHERE name='PRODUCTS')
CREATE EXTERNAL TABLE test_resources.TUTORIAL.PRODUCTS (
[PRODUCT_ID] VARCHAR(50),
[CATEGORY] VARCHAR(50),
[SUB_CATEGORY] VARCHAR(50),
[SIZE] VARCHAR(50),
[PURCHASE_PRICE] FLOAT,
[COLOR] VARCHAR(50),
[BRAND] VARCHAR(50)
)
WITH (
LOCATION = 'your-container/path/to/tutorial/products.csv',
DATA_SOURCE = [YourDataSource],
FILE_FORMAT = [CsvFormatWithHeader]
)
GO
Importing DirectQuery modules
Additionally to classic Atoti modules you will need to import two modules:- One generic module to interact with external databases
<dependency>
<groupId>com.activeviam.activepivot</groupId>
<artifactId>activepivot-external-database</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-application</artifactId>
<version>${project.version}</version>
</dependency>
- One to connect to your specific database
- BigQuery
- ClickHouse
- Databricks
- Generic JDBC
- MS SQL
- Redshift
- Snowflake
- Synapse
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-bigquery</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-clickhouse</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-databricks</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-generic-jdbc</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-mssql</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-redshift</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-snowflake</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.activeviam.directquery</groupId>
<artifactId>api-synapse</artifactId>
<version>${project.version}</version>
</dependency>
Using the external database in DirectQuery
Create the database connector
- BigQuery
- ClickHouse
- Databricks
- Generic JDBC
- MS SQL
- Redshift
- Snowflake
- Synapse
Let’s start by creating a connection to BigQuery.
In this example we use a service account and have set an environment variable.
For more details and options about authentication on BigQuery we recommend to follow the official documentation and read the Connect to BigQuery page.This connection can then be used to create a DirectQuery connector.
In this example we use a service account and have set an environment variable.
For more details and options about authentication on BigQuery we recommend to follow the official documentation and read the Connect to BigQuery page.
final BigQuery bigquery =
BigQueryOptions.newBuilder().setProjectId(PROJECT_ID).build().getService();
final DirectQueryConnector<BigqueryDatabaseSettings> connector =
BigqueryConnectorFactory.INSTANCE.createConnector(
BigqueryClientSettings.builder().bigQuery(bigquery).build());
Let’s start by creating a configuration which will allow us to connect to the ClickHouse database.
HOST is the URL of the server hosting the database. If you run it locally, it can be “localhost”.
PORT is the port on which the server is listening. The default for ClickHouse is 8123.
DATABASE would be “TUTORIAL” based on the previous setup.
Finally, USERNAME and PASSWORD should be set appropriately. By default, they could be left null.Then, we use these properties to create a DirectQuery connector.For more details and options about authentication on ClickHouse we recommend to read the
Connect to ClickHouse page.
final ClickhousePropertiesV2 properties =
ClickhousePropertiesV2.builder()
.host(HOST)
.protocol(Protocol.HTTP)
.port(port)
.database("TUTORIAL")
.userName(USERNAME)
.password(PASSWORD)
.build();
final ClickhouseClientSettings clientSettings =
ClickhouseClientSettings.builder().propertiesV2(properties).build();
final DirectQueryConnector<ClickhouseDatabaseSettings> connector =
ClickhouseConnectorFactory.INSTANCE.createConnector(clientSettings);
Here we define properties of a Databricks connection.
We connect to a database with a user password authentication.
You can read Connect to Databricks page for more details about user authentication. The JDBC connection string also contains parameters to set the user and the database.A DirectQuery connector can be set up combining
JDBC connection string and these properties.
We connect to a database with a user password authentication.
You can read Connect to Databricks page for more details about user authentication. The JDBC connection string also contains parameters to set the user and the database.
final DatabricksConnectionProperties properties =
DatabricksConnectionProperties.builder()
.connectionString(connectionString)
.additionalOption(
DatabricksConnectionProperties.PASSWORD_PROPERTY_KEY, "your-plain-password")
.build();
final DatabricksClientSettings clientSettings =
DatabricksClientSettings.builder().connectionProperties(properties).build();
final DirectQueryConnector<DatabricksDatabaseSettings> connector =
DatabricksConnectorFactory.INSTANCE.createConnector(clientSettings);
First, one needs to implement a class implementing
For more details about the dialect, we recommend to read the Generic JDBC page.
Here we define properties of a generic JDBC connection.
We connect to a database with a user password authentication.
A DirectQuery connector can be set up combining this dialect and these properties and these settings.
ISqlDialect; this class is the key component for
DirectQuery to generate SQL queries which can be interpreted by your database.For more details about the dialect, we recommend to read the Generic JDBC page.
final SqlDialect sqlDialect = MsSqlSqlDialect.sqlDialect();
We connect to a database with a user password authentication.
final GenericJdbcProperties properties =
GenericJdbcProperties.builder()
.connectionString(connectionString)
.additionalOption("user", user)
.additionalOption("password", password)
.additionalOption("trustServerCertificate", "true")
.additionalOption("database", DATABASE_NAME)
.build();
final GenericJdbcClientSettings clientSettings =
GenericJdbcClientSettings.builder().properties(properties).build();
final DirectQueryConnector<GenericJdbcDatabaseSettings> connector =
GenericJdbcConnectorMetaFactory.createConnectorFactory(sqlDialect)
.createConnector(clientSettings);
Let’s start by creating a configuration which will allow us to connect to the Microsoft SQL Server database.
HOST is the URL of the server hosting the database. If you run it locally, it can be “localhost”.
PORT is the port on which the server is listening. The default for MS SQL is 1443.
DATABASE would be “TUTORIAL” based on the previous setup.
Finally, USERNAME and PASSWORD should be set appropriately.Then, we use these properties to create a DirectQuery connector.For more details and options about authentication on ClickHouse we recommend to read the
Connect to MS SQL page.
final SQLServerDataSource dataSource = new SQLServerDataSource();
dataSource.setUser(username);
dataSource.setPassword(password);
// Set up the database to connect to
dataSource.setPortNumber(port);
dataSource.setDatabaseName(DATABASE_NAME);
// Setup connection security parameters
dataSource.setEncrypt("true");
dataSource.setTrustServerCertificate(true);
dataSource.setSendTimeAsDatetime(false);
// Setup number of connection attempts (first tries may fail when pool needs to warm up)
dataSource.setConnectRetryCount(CONNECT_RETRY_ATTEMPTS);
dataSource.setConnectRetryInterval(CONNECT_RETRY_INTERVAL);
final MsSqlClientSettings clientSettings =
MsSqlClientSettings.builder().dataSource(dataSource).build();
final DirectQueryConnector<MsSqlDatabaseSettings> connector =
MsSqlConnectorFactory.INSTANCE.createConnector(clientSettings);
Here we define properties of a Redshift connection.
We connect to a database with a user password authentication.
You can read Connect to Redshift page for more details about user authentication. The JDBC connection string also contains parameters to set the user and the database.Then, we use these properties to create a DirectQuery connector.
We connect to a database with a user password authentication.
You can read Connect to Redshift page for more details about user authentication. The JDBC connection string also contains parameters to set the user and the database.
RedshiftProperties properties =
RedshiftProperties.builder()
.connectionString(connectionString)
.additionalOption(RedshiftProperty.PASSWORD, "your-plain-password")
.build();
final RedshiftClientSettings clientSettings =
RedshiftClientSettings.builder().properties(properties).build();
final DirectQueryConnector<RedshiftDatabaseSettings> connector =
RedshiftConnectorFactory.INSTANCE.createConnector(clientSettings);
Here we define properties of a Snowflake connection.
We connect to a database with a user password authentication. Note that we set a warehouse in this connection properties.
You can read Connect to Snowflake page for more details about user authentication. The JDBC connection string also contains parameters to set the user and the database.An Atoti DirectQuery connector can be created using these properties.
We connect to a database with a user password authentication. Note that we set a warehouse in this connection properties.
You can read Connect to Snowflake page for more details about user authentication. The JDBC connection string also contains parameters to set the user and the database.
final SnowflakeProperties properties =
SnowflakeProperties.builder()
.connectionString(connectionString)
.warehouse("your-warehouse")
.additionalOption(SFSessionProperty.PASSWORD.getPropertyKey(), "your-plain-password")
.build();
final SnowflakeClientSettings clientSettings =
SnowflakeClientSettings.builder().properties(properties).build();
final DirectQueryConnector<SnowflakeDatabaseSettings> connector =
SnowflakeConnectorFactory.INSTANCE.createConnector(clientSettings);
Let’s start by defining some properties to connect to Synapse.
In this example we use SQL authentication.
For more details and options about authentication on Synapse we recommend to read the Connect to Synapse page.Then, we use this data source to create a DirectQuery connector.
In this example we use SQL authentication.
For more details and options about authentication on Synapse we recommend to read the Connect to Synapse page.
final SQLServerDataSource dataSource = new SQLServerDataSource();
// Setup authentication
// authenticationMethod = "SqlPassword", "ActiveDirectoryPassword", ... depending on which login
// are used
dataSource.setAuthentication(authenticationMethod);
dataSource.setUser(USERNAME);
dataSource.setPassword(PASSWORD);
// Set up the database to connect to
dataSource.setServerName(SERVER_NAME);
dataSource.setDatabaseName(DATABASE_NAME);
// Setup connection security parameters
dataSource.setEncrypt("true");
dataSource.setHostNameInCertificate(HOST_NAME_IN_CERTIFICATE);
dataSource.setTrustServerCertificate(false);
dataSource.setSendTimeAsDatetime(false);
// Setup number of connection attempts (first tries may fail when pool needs to warm up)
dataSource.setConnectRetryCount(CONNECT_RETRY_ATTEMPTS);
dataSource.setConnectRetryInterval(CONNECT_RETRY_INTERVAL);
dataSource.setLoginTimeout(CONNECT_RETRY_ATTEMPTS * CONNECT_RETRY_INTERVAL);
final SynapseClientSettings clientSettings =
SynapseClientSettings.builder().dataSource(dataSource).build();
final DirectQueryConnector<SynapseDatabaseSettings> connector =
SynapseConnectorFactory.INSTANCE.createConnector(clientSettings);
Add external tables into DirectQuery
The second step is to create the external database schema description.We start by building the table definition. This can be done either with the help of an external database discoverer or by manually building the description of the tables. Let’s first create a discoverer from the connector. Note that there are settings for the discoverer to customize the behavior but in this example we will use the default ones.
- BigQuery
- ClickHouse
- Databricks
- Generic JDBC
- MS SQL
- Redshift
- Snowflake
- Synapse
final IDirectQueryTableDiscoverer discoverer = connector.getDiscoverer();
final TableDescription salesTable =
discoverer.discoverTable(new SqlTableId(PROJECT_ID, "tutorial", "SALES"));
final TableDescription productsTable =
discoverer.discoverTable(new SqlTableId(PROJECT_ID, "tutorial", "PRODUCTS"));
final IDirectQueryTableDiscoverer discoverer = connector.getDiscoverer();
final TableDescription salesTable =
discoverer.discoverTable(new ClickhouseTableId("TUTORIAL", "SALES"));
final TableDescription productsTable =
discoverer.discoverTable(new ClickhouseTableId("TUTORIAL", "PRODUCTS"));
final IDirectQueryTableDiscoverer discoverer = connector.getDiscoverer();
final TableDescription salesTable =
discoverer.discoverTable(new SqlTableId(CATALOG_NAME, TUTORIAL_SCHEMA_NAME, "SALES"));
final TableDescription productsTable =
discoverer.discoverTable(new SqlTableId(CATALOG_NAME, TUTORIAL_SCHEMA_NAME, "PRODUCTS"));
final IDirectQueryTableDiscoverer discoverer = connector.getDiscoverer();
final TableDescription salesTable =
discoverer.discoverTable(new SqlTableId(DATABASE_NAME, SCHEMA_NAME, "SALES"));
final TableDescription productsTable =
discoverer.discoverTable(new SqlTableId(DATABASE_NAME, SCHEMA_NAME, "PRODUCTS"));
final IDirectQueryTableDiscoverer discoverer = connector.getDiscoverer();
final TableDescription salesTable =
discoverer.discoverTable(new SqlTableId(DATABASE_NAME, SCHEMA_NAME, "SALES"));
final TableDescription productsTable =
discoverer.discoverTable(new SqlTableId(DATABASE_NAME, SCHEMA_NAME, "PRODUCTS"));
final IDirectQueryTableDiscoverer discoverer = connector.getDiscoverer();
final TableDescription salesTable =
discoverer.discoverTable(new SqlTableId(TEST_DATABASE_NAME, TUTORIAL_SCHEMA_NAME, "SALES"));
final TableDescription productsTable =
discoverer.discoverTable(
new SqlTableId(TEST_DATABASE_NAME, TUTORIAL_SCHEMA_NAME, "PRODUCTS"));
final IDirectQueryTableDiscoverer discoverer = connector.getDiscoverer();
final TableDescription salesTable =
discoverer.discoverTable(new SqlTableId(TEST_DATABASE_NAME, TUTORIAL_SCHEMA_NAME, "SALES"));
final TableDescription productsTable =
discoverer.discoverTable(
new SqlTableId(TEST_DATABASE_NAME, TUTORIAL_SCHEMA_NAME, "PRODUCTS"));
final IDirectQueryTableDiscoverer discoverer = connector.getDiscoverer();
final TableDescription salesTable =
discoverer.discoverTable(new SqlTableId(DATABASE_NAME, "TUTORIAL", "SALES"));
final TableDescription productsTable =
discoverer.discoverTable(new SqlTableId(DATABASE_NAME, "TUTORIAL", "PRODUCTS"));
For more information about creating table manually without discovery read the Create tables page.
Create a join between tables
Then we create a join between the tables of our example.Sales has a many-to-one relationship with Products on the field product_id.
final JoinDescription join =
JoinDescription.builder()
.name("SALES_TO_PRODUCTS")
.sourceTableName("SALES")
.targetTableName("PRODUCTS")
.fieldMappings(Set.of(new FieldMapping("PRODUCT", "PRODUCT_ID")))
.build();
Relationship optionality
When defining a join you can set the Relation Optionality.The default relationship is
Optional, it means that some rows in Sales table could have a product which is
missing from the Products table.You can set this relationship to
Mandatory, if you are sure that all products used in Sales table are in Products
table.Mandatory relationship could lead to better performance in some connector features
like Incremental refresh.
Create the schema
Combining tables and join definitions, we can now build our schema description.final SchemaDescription schema =
SchemaDescription.builder()
.externalTables(List.of(salesTable, productsTable))
.externalJoins(List.of(join))
.build();
Building an app on the external database
We can now build an Atoti cube manager description as usual.final ISelectionDescription selection =
StartBuilding.selection(schema).fromBaseStore("SALES").withAllReachableFields().build();
final IActivePivotInstanceDescription cubeDescription =
StartBuilding.cube("MyCube")
.withMeasures(
superMeasureBuilder ->
superMeasureBuilder.withCalculations(
c -> {
// You can use the database fields to define aggregations
Copper.sum("QUANTITY").as("QUANTITY.SUM").publish(c);
Copper.max("UNIT_PRICE").as("Max price").publish(c);
}))
.withDimensions(
builder ->
builder
.withSingleLevelDimensions(
List.of("DATE", "SHOP", "PRODUCT", "SIZE", "BRAND"))
.withDimension("CATEGORY")
.withHierarchyOfSameName()
.withLevels("CATEGORY", "SUB_CATEGORY"))
.build();
final IActivePivotManagerDescription managerDescription =
StartBuilding.managerDescription()
.withName("MyManager")
.withSchema()
.withSelection(selection)
.withCube(cubeDescription)
.build();
final Application app =
Application.builder(connector)
.schema(schema)
.managerDescription(managerDescription)
.build();
app.start();
final IActivePivotManager manager = app.getManager();
final IActivePivotVersion pivot =
manager.getActivePivot("MyCube").getHead(IEpoch.MASTER_BRANCH_NAME);
final ICellSet result =
IActivePivotQueryRunner.create()
.withWildcardCoordinate(LevelIdentifier.simple("SHOP"))
.forMeasures("QUANTITY.SUM")
.run(pivot);
GetAggregatesResultCellset.check(pivot, result).printCellSet();