Pages

Tuesday, August 14, 2018

A date dimension for SQL Server

A date dimension for SQL Server

The most common table you will find in a data warehouse will be the date dimension. There is no "right" implementation beyond what the customer needs to solve their business problem. I'm posting a date dimension for SQL Server that I generally find useful as a starting point in the hopes that I quit losing it. Perhaps you'll find it useful or can use the approach to build one more tailored to your environment.

As the comments indicate, this will create: a DW schema, a table named DimDate and then populate the date dimension from 1900-01-01 to 2079-06-06 endpoints inclusive. I also patch in 9999-12-31 as a well known "unknown" date value. Sure, it's odd to have an incomplete year - this is your opportunity to tune the supplied code ;)

-- At the conclusion of this script, there will be
-- A schema named DW
-- A table named DW.DimDate
-- DW.DimDate will be populated with all the days between 1900-01-01 and 2079-06-06 (inclusive)
--   and the sentinel date of 9999-12-31

IF NOT EXISTS
(
    SELECT * FROM sys.schemas AS S WHERE S.name = 'DW'
)
BEGIN
    EXECUTE('CREATE SCHEMA DW AUTHORIZATION dbo;');
END
GO
IF NOT EXISTS
(
    SELECT * FROM sys.schemas AS S INNER JOIN sys.tables AS T ON T.schema_id = S.schema_id
    WHERE S.name = 'DW' AND T.name = 'DimDate'
)
BEGIN
    CREATE TABLE DW.DimDate
    (
        DateSK int NOT NULL
    ,   FullDate date NOT NULL
    ,   CalendarYear int NOT NULL
    ,   CalendarYearText char(4) NOT NULL
    ,   CalendarMonth int NOT NULL
    ,   CalendarMonthText varchar(12) NOT NULL
    ,   CalendarDay int NOT NULL
    ,   CalendarDayText char(2) NOT NULL
    ,   CONSTRAINT PK_DW_DimDate
            PRIMARY KEY CLUSTERED
            (
                DateSK ASC
            )
            WITH (ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, DATA_COMPRESSION = PAGE)
    ,   CONSTRAINT UQ_DW_DimDate UNIQUE (FullDate)
    );
END
GO
WITH 
    -- Define the start and the terminal value
    BOOKENDS(FirstDate, LastDate) AS (SELECT DATEFROMPARTS(1900,1,1), DATEFROMPARTS(9999,12,31))
    -- itzik ben gan rapid number generator
    -- Builds 65537 rows. Need more - follow the pattern
    --  Need fewer rows, add a top below
,    T0 AS 
(
    -- 2
    SELECT 1 AS n
    UNION ALL SELECT 1
)
,    T1 AS
(
    -- 2^2 => 4 
    SELECT 1 AS n
    FROM
        T0
        CROSS APPLY T0 AS TX
)
,    T2 AS 
(
    -- 4^4 => 16
    SELECT 1 AS n
    FROM
        T1
        CROSS APPLY T1 AS TX
)
,    T3 AS 
(
    -- 16^16 => 256
    SELECT 1 AS n
    FROM
        T2
        CROSS APPLY T2 AS TX
)
,    T4 AS
(
    -- 256^256 => 65536
    -- or approx 179 years
    SELECT 1 AS n
    FROM
        T3
        CROSS APPLY T3 AS TX
)
,    T5 AS
(
    -- 65536^65536 => basically infinity
    SELECT 1 AS n
    FROM
        T4
        CROSS APPLY T4 AS TX
)
    -- Assume we now have enough numbers for our purpose
,    NUMBERS AS
(
    -- Add a SELECT TOP (N) here if you need fewer rows
    SELECT
        CAST(ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS int) -1 AS number
    FROM
        T4
    UNION 
    -- Build End of time date
    -- Get an N value of 2958463 for
    -- 9999-12-31 assuming start date of 1900-01-01
    SELECT
        ABS(DATEDIFF(DAY, BE.LastDate, BE.FirstDate))
    FROM
        BOOKENDS AS BE
)
, DATES AS
(
SELECT
    PARTS.DateSk
,   FD.FullDate
,   PARTS.CalendarYear
,   PARTS.CalendarYearText
,   PARTS.CalendarMonth
,   PARTS.CalendarMonthText
,   PARTS.CalendarDay
,   PARTS.CalendarDayText
FROM
    NUMBERS AS N
    CROSS APPLY
    (
        SELECT
            DATEADD(DAY, N.number, BE.FirstDate) AS FullDate
        FROM
            BOOKENDS AS BE
    )FD
    CROSS APPLY
    (
        SELECT
            CAST(CONVERT(char(8), FD.FullDate, 112) AS int) AS DateSk
        ,   DATEPART(YEAR, FD.FullDate) AS [CalendarYear] 
        ,   DATENAME(YEAR, FD.FullDate) AS [CalendarYearText]
        ,   DATEPART(MONTH, FD.FullDate) AS [CalendarMonth]
        ,   DATENAME(MONTH, FD.FullDate) AS [CalendarMonthText]
        ,   DATEPART(DAY, FD.FullDate)  AS [CalendarDay]
        ,   DATENAME(DAY, FD.FullDate) AS [CalendarDayText]

    )PARTS
)
INSERT INTO
    DW.DimDate
(
    DateSK
,   FullDate
,   CalendarYear
,   CalendarYearText
,   CalendarMonth
,   CalendarMonthText
,   CalendarDay
,   CalendarDayText
)
SELECT
    D.DateSk
,   D.FullDate
,   D.CalendarYear
,   D.CalendarYearText
,   D.CalendarMonth
,   D.CalendarMonthText
,   D.CalendarDay
,   D.CalendarDayText
FROM
    DATES AS D
WHERE NOT EXISTS
(
    SELECT * FROM DW.DimDate AS DD
    WHERE DD.DateSK = D.DateSk
);

No comments:

Post a Comment