Sentencia INSERT
Crono SQL soporta la sintaxis estándar de la sentencia INSERT:
INSERT INTO dwh.DimProducts( ProductID, Product, ProductCategory, ProductSubCategory, ProductNumber, ProductModel, Color, StandardCost, ListPrice, ProductSize, SizeUnitMeasureCode, Weight, WeightUnitMeasureCode)select ProductID, Product.Name, ProductCategory.name, ProductSubCategory.name, ProductNumber, ProductModel.name, Product.Color, Product.StandardCost, Product.ListPrice, Product.Size, Product.SizeUnitMeasureCode, Product.Weight, Product.WeightUnitMeasureCodeFROM staging.ProductLEFT JOIN staging.ProductSubCategory using Product(ProductSubcategoryID)LEFT JOIN staging.ProductCategory using ProductSubCategory(ProductCategoryId)LEFT JOIN staging.ProductModel using Product(ProductModelID)Ver SQL compilado
INSERT dwh.DimProducts(ProductID,Product,ProductCategory,ProductSubCategory,ProductNumber,ProductModel,Color,StandardCost,ListPrice,ProductSize,SizeUnitMeasureCode,Weight,WeightUnitMeasureCode)SELECT ProductID, Product.Name AS Name, ProductCategory.name AS name, ProductSubCategory.name AS name, ProductNumber, ProductModel.name AS name, Product.Color AS Color, Product.StandardCost AS StandardCost, Product.ListPrice AS ListPrice, Product.Size AS Size, Product.SizeUnitMeasureCode AS SizeUnitMeasureCode, Product.Weight AS Weight, Product.WeightUnitMeasureCode AS WeightUnitMeasureCodeFROM staging.ProductLEFT JOIN staging.ProductSubCategory ON (Product.ProductSubcategoryID=ProductSubCategory.ProductSubcategoryID)LEFT JOIN staging.ProductCategory ON (ProductSubCategory.ProductCategoryId=ProductCategory.ProductCategoryId)LEFT JOIN staging.ProductModel ON (Product.ProductModelID=ProductModel.ProductModelID);Sin embargo, especialmente cuando la tabla tiene muchas columnas, esta sintaxis es repetitiva e incómoda de mantener. Por ello, Crono SQL prescinde de la cláusula VALUES y asume que el nombre de los campos coincide con el alias de las columnas de la consulta de origen:
INSERT INTO dwh.DimProductsselect ProductID, Product.Name Product, ProductCategory.name ProductCategory, ProductSubCategory.name ProductSubCategory, ProductNumber, ProductModel.name ProductModel, Product.Color, Product.StandardCost ProductCost, Product.ListPrice, Product.Size, Product.SizeUnitMeasureCode, Product.Weight, Product.WeightUnitMeasureCodeFROM staging.ProductLEFT JOIN staging.ProductSubCategory using Product(ProductSubcategoryID)LEFT JOIN staging.ProductCategory using ProductSubCategory(ProductCategoryId)LEFT JOIN staging.ProductModel using Product(ProductModelID)Ver SQL compilado
;WITHquery AS ( SELECT ProductID, Product.Name AS Product, ProductCategory.name AS ProductCategory, ProductSubCategory.name AS ProductSubCategory, ProductNumber, ProductModel.name AS ProductModel, Product.Color AS Color, Product.StandardCost AS ProductCost, Product.ListPrice AS ListPrice, Product.Size AS Size, Product.SizeUnitMeasureCode AS SizeUnitMeasureCode, Product.Weight AS Weight, Product.WeightUnitMeasureCode AS WeightUnitMeasureCode FROM staging.Product LEFT JOIN staging.ProductSubCategory ON (Product.ProductSubcategoryID=ProductSubCategory.ProductSubcategoryID) LEFT JOIN staging.ProductCategory ON (ProductSubCategory.ProductCategoryId=ProductCategory.ProductCategoryId) LEFT JOIN staging.ProductModel ON (Product.ProductModelID=ProductModel.ProductModelID))INSERT dwh.DimProducts(ProductID,Product,ProductCategory,ProductSubCategory,ProductNumber,ProductModel,Color,ProductCost,ListPrice,Size,SizeUnitMeasureCode,Weight,WeightUnitMeasureCode)SELECT query.ProductID, query.Product, query.ProductCategory, query.ProductSubCategory, query.ProductNumber, query.ProductModel, query.Color, query.ProductCost, query.ListPrice, query.Size, query.SizeUnitMeasureCode, query.Weight, query.WeightUnitMeasureCodeFROM query;Por supuesto, para definir el origen de datos del INSERT se pueden utilizar todas las características de la sentencia SELECT de Crono SQL. En la siguiente consulta, por ejemplo, se verifica que las relaciones sean correctas antes de realizar la inserción. Es decir, si las relaciones pierden o duplican registros, no se ejecutará el INSERT.
INSERT INTO dwh.DimProductsselect ProductID, Product.Name Product, ProductCategory.name ProductCategory, ProductSubCategory.name ProductSubCategory, ProductNumber, ProductModel.name ProductModel, Product.Color, Product.StandardCost ProductCost, Product.ListPrice, Product.Size, Product.SizeUnitMeasureCode, Product.Weight, Product.WeightUnitMeasureCodeFROM staging.ProductLEFT JOIN staging.ProductSubCategory using Product(ProductSubcategoryID)LEFT JOIN staging.ProductCategory using ProductSubCategory(ProductCategoryId)LEFT JOIN staging.ProductModel using Product(ProductModelID)CHECK SNOWFLAKEVer SQL compilado
IF EXISTS ( SELECT count(*) FROM staging.Product LEFT JOIN staging.ProductSubCategory ON (Product.ProductSubcategoryID=ProductSubCategory.ProductSubcategoryID) LEFT JOIN staging.ProductCategory ON (ProductSubCategory.ProductCategoryId=ProductCategory.ProductCategoryId) LEFT JOIN staging.ProductModel ON (Product.ProductModelID=ProductModel.ProductModelID) HAVING count(*) <> (SELECT count(*) FROM staging.Product)) THROW 50001,'Las relaciones de esta consulta pierden o duplican registros de Product.',1
;WITHquery AS ( SELECT ProductID, Product.Name AS Product, ProductCategory.name AS ProductCategory, ProductSubCategory.name AS ProductSubCategory, ProductNumber, ProductModel.name AS ProductModel, Product.Color AS Color, Product.StandardCost AS ProductCost, Product.ListPrice AS ListPrice, Product.Size AS Size, Product.SizeUnitMeasureCode AS SizeUnitMeasureCode, Product.Weight AS Weight, Product.WeightUnitMeasureCode AS WeightUnitMeasureCode FROM staging.Product LEFT JOIN staging.ProductSubCategory ON (Product.ProductSubcategoryID=ProductSubCategory.ProductSubcategoryID) LEFT JOIN staging.ProductCategory ON (ProductSubCategory.ProductCategoryId=ProductCategory.ProductCategoryId) LEFT JOIN staging.ProductModel ON (Product.ProductModelID=ProductModel.ProductModelID))INSERT dwh.DimProducts(ProductID,Product,ProductCategory,ProductSubCategory,ProductNumber,ProductModel,Color,ProductCost,ListPrice,Size,SizeUnitMeasureCode,Weight,WeightUnitMeasureCode)SELECT query.ProductID, query.Product, query.ProductCategory, query.ProductSubCategory, query.ProductNumber, query.ProductModel, query.Color, query.ProductCost, query.ListPrice, query.Size, query.SizeUnitMeasureCode, query.Weight, query.WeightUnitMeasureCodeFROM query;También se puede utilizar, por ejemplo, la sentencia COMBINE o la funcionalidad de MATERIALIZE. En la siguiente consulta se verificará que todas las relaciones sean correctas, se crearán entonces las tablas temporales con la información de ventas y compras, y finalmente se unirán mediante un FULL JOIN en un único resultado a insertar.
INSERT INTO dwh.VentasVsComprasCOMBINE BY Product,ProductNumber MATERIALIZE sales ( select Product.Name Product, Product.ProductNumber ProductNumber, sum(PurchaseOrderDetail.LineTotal) Purchases from staging.PurchaseOrderDetail inner join staging.Product using ProductId CHECK SNOWFLAKE), MATERIALIZE purchases ( select Product.Name Product, Product.ProductNumber ProductNumber, sum(SalesOrderDetail.LineTotal) Sales from staging.SalesOrderDetail inner join staging.Product using ProductId CHECK SNOWFLAKE)Ver SQL compilado
-- Materialized query: salesSELECT Product.Name AS Product, Product.ProductNumber AS ProductNumber, sum(PurchaseOrderDetail.LineTotal) AS PurchasesINTO #sales__14C85FROM staging.PurchaseOrderDetailINNER JOIN staging.Product ON (PurchaseOrderDetail.ProductId=Product.ProductId)GROUP BY Product.Name, Product.ProductNumber
-- Materialized query: purchasesSELECT Product.Name AS Product, Product.ProductNumber AS ProductNumber, sum(SalesOrderDetail.LineTotal) AS SalesINTO #purchases__9D591FROM staging.SalesOrderDetailINNER JOIN staging.Product ON (SalesOrderDetail.ProductId=Product.ProductId)GROUP BY Product.Name, Product.ProductNumber
IF EXISTS ( SELECT count(*) FROM staging.PurchaseOrderDetail LEFT JOIN staging.Product ON (PurchaseOrderDetail.ProductId=Product.ProductId) HAVING count(CASE WHEN Product.ProductId IS NOT NULL THEN 1 END) <> (SELECT count(*) FROM staging.PurchaseOrderDetail)) THROW 50001,'Las relaciones de esta consulta pierden o duplican registros de PurchaseOrderDetail.',1
IF EXISTS ( SELECT count(*) FROM staging.SalesOrderDetail LEFT JOIN staging.Product ON (SalesOrderDetail.ProductId=Product.ProductId) HAVING count(CASE WHEN Product.ProductId IS NOT NULL THEN 1 END) <> (SELECT count(*) FROM staging.SalesOrderDetail)) THROW 50001,'Las relaciones de esta consulta pierden o duplican registros de SalesOrderDetail.',1
;WITHquery AS ( SELECT coalesce(sales.Product,purchases.Product) AS Product, coalesce(sales.ProductNumber,purchases.ProductNumber) AS ProductNumber, sales.Purchases AS Purchases, purchases.Sales AS Sales FROM #sales__14C85 sales FULL JOIN #purchases__9D591 purchases ON (sales.Product=purchases.Product AND sales.ProductNumber=purchases.ProductNumber))INSERT dwh.VentasVsCompras(Product,ProductNumber,Purchases,Sales)SELECT query.Product, query.ProductNumber, query.Purchases, query.SalesFROM query;Una necesidad habitual en ETL/DWH es insertar únicamente los registros que no existan en la tabla destino. Para ello, es necesario especificar la “clave de inserción”. La clave de inserción se define mediante el carácter # delante del alias de cada campo. Como veremos a lo largo de este tutorial, las claves de todas las estrategias de carga se definen con el carácter #.
La siguiente consulta inserta los productos que no existan aún en la tabla dwh.DimProducts
INSERT INTO dwh.DimProductsselect ProductID #ProductID, Product.Name Product, ProductCategory.name ProductCategory, ProductSubCategory.name ProductSubCategory, ProductNumber, ProductModel.name ProductModel, Product.Color, Product.StandardCost ProductCost, Product.ListPrice, Product.Size, Product.SizeUnitMeasureCode, Product.Weight, Product.WeightUnitMeasureCodeFROM staging.ProductLEFT JOIN staging.ProductSubCategory using Product(ProductSubcategoryID)LEFT JOIN staging.ProductCategory using ProductSubCategory(ProductCategoryId)LEFT JOIN staging.ProductModel using Product(ProductModelID)Ver SQL compilado
;WITHquery AS ( SELECT ProductID, Product.Name AS Product, ProductCategory.name AS ProductCategory, ProductSubCategory.name AS ProductSubCategory, ProductNumber, ProductModel.name AS ProductModel, Product.Color AS Color, Product.StandardCost AS ProductCost, Product.ListPrice AS ListPrice, Product.Size AS Size, Product.SizeUnitMeasureCode AS SizeUnitMeasureCode, Product.Weight AS Weight, Product.WeightUnitMeasureCode AS WeightUnitMeasureCode FROM staging.Product LEFT JOIN staging.ProductSubCategory ON (Product.ProductSubcategoryID=ProductSubCategory.ProductSubcategoryID) LEFT JOIN staging.ProductCategory ON (ProductSubCategory.ProductCategoryId=ProductCategory.ProductCategoryId) LEFT JOIN staging.ProductModel ON (Product.ProductModelID=ProductModel.ProductModelID))INSERT dwh.DimProducts(ProductID,Product,ProductCategory,ProductSubCategory,ProductNumber,ProductModel,Color,ProductCost,ListPrice,Size,SizeUnitMeasureCode,Weight,WeightUnitMeasureCode)SELECT query.ProductID, query.Product, query.ProductCategory, query.ProductSubCategory, query.ProductNumber, query.ProductModel, query.Color, query.ProductCost, query.ListPrice, query.Size, query.SizeUnitMeasureCode, query.Weight, query.WeightUnitMeasureCodeFROM queryLEFT JOIN dwh.DimProducts ON (DimProducts.ProductID=query.ProductID)WHERE DimProducts.ProductID IS NULL;Es es posible realizar una recarga completa mediante la sentencia DELETE AND INSERT. Esta sentencia elimina el contenido de la tabla y la recarga con los datos de la consulta de origen.
DELETE AND INSERT INTO dwh.DimProductsselect ProductID, Product.Name Product, ProductCategory.name ProductCategory, ProductSubCategory.name ProductSubCategory, ProductNumber, ProductModel.name ProductModel, Product.Color, Product.StandardCost ProductCostFROM staging.ProductLEFT JOIN staging.ProductSubCategory using Product(ProductSubcategoryID)LEFT JOIN staging.ProductModel using Product(ProductModelID)Ver SQL compilado
DELETE FROM dwh.DimProducts;
;WITHquery AS ( SELECT ProductID, Product.Name AS Product, ProductCategory.name AS ProductCategory, ProductSubCategory.name AS ProductSubCategory, ProductNumber, ProductModel.name AS ProductModel, Product.Color AS Color, Product.StandardCost AS ProductCost FROM staging.Product LEFT JOIN staging.ProductSubCategory ON (Product.ProductSubcategoryID=ProductSubCategory.ProductSubcategoryID) LEFT JOIN staging.ProductModel ON (Product.ProductModelID=ProductModel.ProductModelID))INSERT dwh.DimProducts(ProductID,Product,ProductCategory,ProductSubCategory,ProductNumber,ProductModel,Color,ProductCost)SELECT query.ProductID, query.Product, query.ProductCategory, query.ProductSubCategory, query.ProductNumber, query.ProductModel, query.Color, query.ProductCostFROM query;También es posible realizar un TRUNCATE AND INSERT
TRUNCATE AND INSERT INTO dwh.DimProductsselect ProductID, Product.Name Product, ProductCategory.name ProductCategory, ProductSubCategory.name ProductSubCategory, ProductNumber, ProductModel.name ProductModel, Product.Color, Product.StandardCost ProductCostFROM staging.ProductLEFT JOIN staging.ProductSubCategory using Product(ProductSubcategoryID)LEFT JOIN staging.ProductModel using Product(ProductModelID)Ver SQL compilado
TRUNCATE TABLE dwh.DimProducts;
;WITHquery AS ( SELECT ProductID, Product.Name AS Product, ProductCategory.name AS ProductCategory, ProductSubCategory.name AS ProductSubCategory, ProductNumber, ProductModel.name AS ProductModel, Product.Color AS Color, Product.StandardCost AS ProductCost FROM staging.Product LEFT JOIN staging.ProductSubCategory ON (Product.ProductSubcategoryID=ProductSubCategory.ProductSubcategoryID) LEFT JOIN staging.ProductModel ON (Product.ProductModelID=ProductModel.ProductModelID))INSERT dwh.DimProducts(ProductID,Product,ProductCategory,ProductSubCategory,ProductNumber,ProductModel,Color,ProductCost)SELECT query.ProductID, query.Product, query.ProductCategory, query.ProductSubCategory, query.ProductNumber, query.ProductModel, query.Color, query.ProductCostFROM query;La opción PARTITION permite recargar solo una parte de la tabla. Por ejemplo, es habitual cargar solo los movimientos del mes en curso (si sabemos que los demás no han sido modificados). La siguiente sentencia recarga las ventas de los últimos 30 días:
DELETE AND INSERT INTO dwh.FactSalesOrderHeader PARTITION (OrderDate>=getdate()-30)SELECT SalesOrderHeader.SalesOrderId, Customer.CustomerId, cast(SalesOrderHeader.OrderDate as date) OrderDate, SalesOrderHeader.SalesOrderNumber, cast(SalesOrderHeader.DueDate as date) DueDate, cast(SalesOrderHeader.ShipDate as date) ShipDate, SalesOrderHeader.OnlineOrderFlag, SalesOrderHeader.PurchaseOrderNumber, SalesOrderHeader.AccountNumber, SalesOrderHeader.Freight, SalesOrderHeader.CreditCardApprovalCode, SalesOrderHeader.SubTotal Amount, SalesOrderHeader.TaxAmtFROM staging.SalesOrderHeaderINNER JOIN staging.customer using SalesOrderHeader(customerId)where OrderDate>=getdate()-30Ver SQL compilado
DELETE FROM dwh.FactSalesOrderHeaderWHERE FactSalesOrderHeader.OrderDate>=getdate()-30;
;WITHquery AS ( SELECT SalesOrderHeader.SalesOrderId AS SalesOrderId, Customer.CustomerId AS CustomerId, CAST(SalesOrderHeader.OrderDate AS date) AS OrderDate, SalesOrderHeader.SalesOrderNumber AS SalesOrderNumber, CAST(SalesOrderHeader.DueDate AS date) AS DueDate, CAST(SalesOrderHeader.ShipDate AS date) AS ShipDate, SalesOrderHeader.OnlineOrderFlag AS OnlineOrderFlag, SalesOrderHeader.PurchaseOrderNumber AS PurchaseOrderNumber, SalesOrderHeader.AccountNumber AS AccountNumber, SalesOrderHeader.Freight AS Freight, SalesOrderHeader.CreditCardApprovalCode AS CreditCardApprovalCode, SalesOrderHeader.SubTotal AS Amount, SalesOrderHeader.TaxAmt AS TaxAmt FROM staging.SalesOrderHeader INNER JOIN staging.customer ON (SalesOrderHeader.customerId=customer.customerId) WHERE CAST(SalesOrderHeader.OrderDate AS date)>=getdate()-30)INSERT dwh.FactSalesOrderHeader(SalesOrderId,CustomerId,OrderDate,SalesOrderNumber,DueDate,ShipDate,OnlineOrderFlag,PurchaseOrderNumber,AccountNumber,Freight,CreditCardApprovalCode,Amount,TaxAmt)SELECT query.SalesOrderId, query.CustomerId, query.OrderDate, query.SalesOrderNumber, query.DueDate, query.ShipDate, query.OnlineOrderFlag, query.PurchaseOrderNumber, query.AccountNumber, query.Freight, query.CreditCardApprovalCode, query.Amount, query.TaxAmtFROM query;Con el tipo de estrategias descritas en este apartado es posible conseguir que cada tabla se cargue desde una única sentencia, centralizando la lógica de negocio en un único punto, facilitando el desarrollo, y promocionando el seguimiento de buenas prácticas. Los mismos principios y métodos se utilizan en el resto de sentencias DML, como se muestra en los siguientes apartados.