IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[vGetVisits]') AND type in (N'U'))
DROP TABLE [dbo].[vGetVisits]
GO
CREATE TABLE [dbo].[vGetVisits](
[id] [int] NOT NULL,
[mydate] [datetime] NOT NULL,
CONSTRAINT [PK_vGetVisits] PRIMARY KEY CLUSTERED
(
[id] ASC
)
)
GO
INSERT INTO [dbo].[vGetVisits]([id], [mydate])
VALUES
(1, '2014-01-01 11:00'),
(2, '2014-01-03 10:00'),
(3, '2014-01-04 09:30'),
(4, '2014-04-01 10:00'),
(5, '2014-05-01 11:00'),
(6, '2014-07-01 09:00'),
(7, '2014-07-31 08:00');
GO
-- Clean up
IF OBJECT_ID (N'dbo.udfLastHitRecursive', N'FN') IS NOT NULL
DROP FUNCTION udfLastHitRecursive;
GO
-- Actual Function
CREATE FUNCTION dbo.udfLastHitRecursive
( @MyDate datetime)
RETURNS TINYINT
AS
BEGIN
-- Your returned value 1 or 0
DECLARE @Returned_Value TINYINT;
SET @Returned_Value=0;
-- Prepare gaps table to be used.
WITH gaps AS
(
-- Select Date and MaxDiff from the original table
SELECT
CONVERT(Date,mydate) AS [date]
, DATEDIFF(day,ISNULL(LAG(mydate, 1) OVER (ORDER BY mydate), mydate) , mydate) AS [MaxDiff]
FROM dbo.vGetVisits
)
SELECT @Returned_Value=
(SELECT DISTINCT -- DISTINCT in case we have same date but different time
CASE WHEN
(
-- It is a first entry
[date]=(SELECT MIN(CONVERT(Date,mydate)) FROM dbo.vGetVisits))
OR
/*
--Gap between last qualifying date and entered is greater than 90
Calculate Running sum upto and including required date
and find a remainder of division by 91.
*/
((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<=t2.[date]
) t1
)%91 -
/*
ISNULL added to include first value that always returns NULL
Calculate Running sum upto and NOT including required date
and find a remainder of division by 91
*/
ISNULL((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<t2.[date]
) t1
)%91, 0) -- End ISNULL
<0 )
/* End Running sum upto and including required date */
OR
-- Gap between two nearest dates is greater than 90
((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<=t2.[date]
) t1
) - ISNULL((SELECT SUM(t1.MaxDiff)
FROM (SELECT [MaxDiff] FROM gaps WHERE [date]<t2.[date]
) t1
), 0) > 90)
THEN 1
ELSE 0
END
AS [Qualifying]
FROM gaps t2
WHERE [date]=CONVERT(Date,@MyDate))
-- What is neccesary to return when entered date is not in dbo.vGetVisits?
RETURN @Returned_Value
END
GO
SELECT
dbo.udfLastHitRecursive(mydate) AS [Qualifying]
, [id]
, mydate
FROM dbo.vGetVisits
ORDER BY mydate
Resultado
También eche un vistazo a Cómo calcular el total acumulado en SQL Server
actualización: vea a continuación los resultados de las pruebas de rendimiento.
Debido a la lógica diferente utilizada para encontrar el "intervalo de 90 días", los ypercube y mis soluciones si se dejan intactos pueden devolver resultados diferentes a la solución de Paul White. Esto se debe al uso de las funciones DATEDIFF y DATEADD respectivamente.
Por ejemplo:
SELECT DATEADD(DAY, 90, '2014-01-01 00:00:00.000')
devuelve '2014-04-01 00: 00: 00.000', lo que significa que '2014-04-01 01: 00: 00.000' tiene más de 90 días de diferencia
pero
SELECT DATEDIFF(DAY, '2014-01-01 00:00:00.000', '2014-04-01 01:00:00.000')
Devuelve '90', lo que significa que todavía está dentro del espacio.
Considere un ejemplo de un minorista. En este caso, está bien vender un producto perecedero que tenga fecha de vencimiento '2014-01-01' en '2014-01-01 23: 59: 59: 999'. Entonces el valor DATEDIFF (DÍA, ...) en este caso está bien.
Otro ejemplo es un paciente que espera ser visto. Para alguien que viene en '2014-01-01 00: 00: 00: 000' y se va en '2014-01-01 23: 59: 59: 999' son 0 (cero) días si se usa DATEDIFF aunque La espera real fue de casi 24 horas. Una vez más, el paciente que llega a '2014-01-01 23:59:59' y se retira a '2014-01-02 00:00:01' esperó un día si se usa DATEDIFF.
Pero yo divago.
Dejé las soluciones DATEDIFF e incluso el rendimiento las probó, pero realmente deberían estar en su propia liga.
También se observó que para los grandes conjuntos de datos es imposible evitar los valores del mismo día. Entonces, si tenemos 13 millones de registros que abarcan 2 años de datos, terminaremos teniendo más de un registro durante algunos días. Esos registros se filtran lo antes posible en las soluciones DATEDIFF de mi y ypercube. Espero que a Ypercube no le importe esto.
Las soluciones se probaron en la siguiente tabla
CREATE TABLE [dbo].[vGetVisits](
[id] [int] NOT NULL,
[mydate] [datetime] NOT NULL,
)
con dos índices agrupados diferentes (mydate en este caso):
CREATE CLUSTERED INDEX CI_mydate on vGetVisits(mydate)
GO
La tabla se rellenó de la siguiente manera
SET NOCOUNT ON
GO
INSERT INTO dbo.vGetVisits(id, mydate)
VALUES (1, '01/01/1800')
GO
DECLARE @i bigint
SET @i=2
DECLARE @MaxRows bigint
SET @MaxRows=13001
WHILE @i<@MaxRows
BEGIN
INSERT INTO dbo.vGetVisits(id, mydate)
VALUES (@i, DATEADD(day,FLOOR(RAND()*(3)),(SELECT MAX(mydate) FROM dbo.vGetVisits)))
SET @i=@i+1
END
Para un caso de filas multimillonarias, INSERT se cambió de tal manera que se agregaron aleatoriamente entradas de 0-20 minutos.
Todas las soluciones fueron cuidadosamente envueltas en el siguiente código
SET NOCOUNT ON
GO
DECLARE @StartDate DATETIME
SET @StartDate = GETDATE()
--- Code goes here
PRINT 'Total milliseconds: ' + CONVERT(varchar, DATEDIFF(ms, @StartDate, GETDATE()))
Códigos reales probados (sin ningún orden en particular):
Solución DATEDIFF de Ypercube ( YPC, DATEDIFF )
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY,
Qualify INT NOT NULL
);
DECLARE
@TheDate DATETIME,
@Qualify INT = 0,
@PreviousCheckDate DATETIME = '1799-01-01 00:00:00'
DECLARE c CURSOR
LOCAL STATIC FORWARD_ONLY READ_ONLY
FOR
SELECT
mydate
FROM
(SELECT
RowNum = ROW_NUMBER() OVER(PARTITION BY cast(mydate as date) ORDER BY mydate)
, mydate
FROM
dbo.vGetVisits) Actions
WHERE
RowNum = 1
ORDER BY
mydate;
OPEN c ;
FETCH NEXT FROM c INTO @TheDate ;
WHILE @@FETCH_STATUS = 0
BEGIN
SET @Qualify = CASE WHEN DATEDIFF(day, @PreviousCheckDate, @Thedate) > 90 THEN 1 ELSE 0 END ;
IF @Qualify=1
BEGIN
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
SET @PreviousCheckDate=@TheDate
END
FETCH NEXT FROM c INTO @TheDate ;
END
CLOSE c;
DEALLOCATE c;
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
Solución DATEADD de Ypercube ( YPC, DATEADD )
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY,
Qualify INT NOT NULL
);
DECLARE
@TheDate DATETIME,
@Next_Date DATETIME,
@Interesting_Date DATETIME,
@Qualify INT = 0
DECLARE c CURSOR
LOCAL STATIC FORWARD_ONLY READ_ONLY
FOR
SELECT
[mydate]
FROM [test].[dbo].[vGetVisits]
ORDER BY mydate
;
OPEN c ;
FETCH NEXT FROM c INTO @TheDate ;
SET @Interesting_Date=@TheDate
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
WHILE @@FETCH_STATUS = 0
BEGIN
IF @TheDate>DATEADD(DAY, 90, @Interesting_Date)
BEGIN
INSERT @cd (TheDate, Qualify)
SELECT @TheDate, @Qualify ;
SET @Interesting_Date=@TheDate;
END
FETCH NEXT FROM c INTO @TheDate;
END
CLOSE c;
DEALLOCATE c;
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
La solución de Paul White ( PW )
;WITH CTE AS
(
SELECT TOP (1)
T.[mydate]
FROM dbo.vGetVisits AS T
ORDER BY
T.[mydate]
UNION ALL
SELECT
SQ1.[mydate]
FROM
(
SELECT
T.[mydate],
rn = ROW_NUMBER() OVER (
ORDER BY T.[mydate])
FROM CTE
JOIN dbo.vGetVisits AS T
ON T.[mydate] > DATEADD(DAY, 90, CTE.[mydate])
) AS SQ1
WHERE
SQ1.rn = 1
)
SELECT
CTE.[mydate]
FROM CTE
OPTION (MAXRECURSION 0);
Mi solución DATEADD ( PN, DATEADD )
DECLARE @cd TABLE
( TheDate datetime PRIMARY KEY
);
DECLARE @TheDate DATETIME
SET @TheDate=(SELECT MIN(mydate) as mydate FROM [dbo].[vGetVisits])
WHILE (@TheDate IS NOT NULL)
BEGIN
INSERT @cd (TheDate) SELECT @TheDate;
SET @TheDate=(
SELECT MIN(mydate) as mydate
FROM [dbo].[vGetVisits]
WHERE mydate>DATEADD(DAY, 90, @TheDate)
)
END
SELECT TheDate
FROM @cd
ORDER BY TheDate ;
Mi solución DATEDIFF ( PN, DATEDIFF )
DECLARE @MinDate DATETIME;
SET @MinDate=(SELECT MIN(mydate) FROM dbo.vGetVisits);
;WITH gaps AS
(
SELECT
t1.[date]
, t1.[MaxDiff]
, SUM(t1.[MaxDiff]) OVER (ORDER BY t1.[date]) AS [Running Total]
FROM
(
SELECT
mydate AS [date]
, DATEDIFF(day,LAG(mydate, 1, mydate) OVER (ORDER BY mydate) , mydate) AS [MaxDiff]
FROM
(SELECT
RowNum = ROW_NUMBER() OVER(PARTITION BY cast(mydate as date) ORDER BY mydate)
, mydate
FROM dbo.vGetVisits
) Actions
WHERE RowNum = 1
) t1
)
SELECT [date]
FROM gaps t2
WHERE
( ([Running Total])%91 - ([Running Total]- [MaxDiff])%91 <0 )
OR
( [MaxDiff] > 90)
OR
([date]=@MinDate)
ORDER BY [date]
Estoy usando SQL Server 2012, así que pido disculpas a Mikael Eriksson, pero su código no se probará aquí. Todavía esperaría que sus soluciones con DATADIFF y DATEADD devuelvan valores diferentes en algunos conjuntos de datos.
Y los resultados reales son: