我有一个df,它告诉用户位置。
ID date_start date_end location
-------------------------------------------------------
1 2020-01-30 2020-03-25 Office 2
1 2010-03-26 2020-04-30 Office 1
1 2010-06-01 2010-08-01 Home and so on
2 2010-07-01 2010-09-03 Office 4
4 2010-06-01 2010-07-23 Office 5
4 2010-07-24 2010-07-31 Home
5 2010-07-01 2010-07-23 Office 1
5 2010-07-24 2010-07-31 Office 2我需要找出在一个特定的时间框架内,比如整个2010年7月,那些在任何办公地点而不是在家里的人。怎么做?人们说,这是为了跟踪他们是否真的在办公室工作。
预期结果:
ID date_start date_end location
--------------------------------------------------
2 2010-07-01 2010-09-03 Office 4
5 2010-07-01 2010-07-23 Office 1
5 2010-07-24 2010-07-31 Office 2解释
发布于 2020-08-09 14:09:34
用NOT EXISTS
select t.* from tablename t
where t.date_start < '2010-08-01' and t.date_end >= '2010-07-01'
and not exists (
select 1
from tablename
where id = t.id
and location like '%Home%'
and date_start < '2010-08-01' and date_end >= '2010-07-01'
)见演示。
结果:
> ID | date_start | date_end | location
> -: | :--------- | :--------- | :-------
> 2 | 2010-07-01 | 2010-09-03 | Office 4
> 5 | 2010-07-01 | 2010-07-23 | Office 1
> 5 | 2010-07-24 | 2010-07-31 | Office 2发布于 2020-08-09 13:57:42
通过使用日期逻辑,您可以在2010年7月看到任意一天的重叠。以下是涵盖七月的所有记录:
select l.*
from locations l
where date_end < '2010-08-01' and date_start >= '2010-07-01';其次,合计:
select l.id
from locations l
where date_end < '2010-08-01' and date_start >= '2010-07-01'
group by l.id
having sum(case when status like '%home%' then 1 else 0 end) = 0 and -- not at home
sum(case when status like '%office%' then 1 else 0 end) > 1 -- in an officehaving条款检查他们是否在一个月内根本不在家,并且至少在办公室呆过一次。
编辑:
如果您想要原始行,那么使用not exists
select l.*
from locations l
where l.date_end < '2010-08-01' and l.date_start >= '2010-07-01' and
l.status like '%office%' and
not exists (select 1
from locations l2
where l2.id = l.id and
l2.date_end < '2010-08-01' and l2.date_start >= '2010-07-01' and
l2.status like '%home%'
);发布于 2020-08-10 01:07:40
这种方法使用日期范围tvf (表值函数)来生成每个ID在不同办公地点出现的不同天数。然后使用相同的tvf按日生成比较间隔。然后,它按日加入,其中匹配的计数等于比较间隔中的天数(包括)。日期范围函数来自于以下脚本:https://www.sqlservercentral.com/scripts/a-daterange-table-valued-function
最后,查询与原始表匹配,以选择包含在比较范围中的日期行。
/* tvf to generate date range */
DROP FUNCTION IF EXISTS [dbo].[daterange];
GO
CREATE FUNCTION [dbo].[daterange]
(@startdate DATETIME2,
@enddate DATETIME2,
@datepart NVARCHAR(3) = 'dd',
@interval INT = 1
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN
WITH a(a)
AS (SELECT 0
FROM(VALUES(0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0)) a(a)),
b(rownum)
AS (SELECT TOP (ABS(CASE @datepart
WHEN 'ns'
THEN DATEDIFF(ns, @enddate, @startdate) / @interval
WHEN 'mcs'
THEN DATEDIFF(mcs, @enddate, @startdate) / @interval
WHEN 'ms'
THEN DATEDIFF(ms, @enddate, @startdate) / @interval
WHEN 'ss'
THEN DATEDIFF(ss, @enddate, @startdate) / @interval
WHEN 'mi'
THEN DATEDIFF(mi, @enddate, @startdate) / @interval
WHEN 'hh'
THEN DATEDIFF(hh, @enddate, @startdate) / @interval
WHEN 'dd'
THEN DATEDIFF(dd, @enddate, @startdate) / @interval
WHEN 'ww'
THEN DATEDIFF(ww, @enddate, @startdate) / @interval
WHEN 'mm'
THEN DATEDIFF(mm, @enddate, @startdate) / @interval
WHEN 'qq'
THEN DATEDIFF(qq, @enddate, @startdate) / @interval
WHEN 'yy'
THEN DATEDIFF(yy, @enddate, @startdate) / @interval
ELSE DATEDIFF(dd, IIF(@startdate < @enddate, @startdate, @enddate), IIF(@startdate < @enddate, @enddate, @startdate)) / @interval
END) + 1) ROW_NUMBER() OVER(
ORDER BY
(
SELECT NULL
)) - 1
FROM a a,
a b,
a c,
a d,
a e,
a f,
a g,
a h) -- a maximum of 16^8 (or 2^32) rows could be returned from this inline tally
SELECT CASE @datepart
WHEN 'ns'
THEN DATEADD(ns, t.addamount, @startdate)
WHEN 'mcs'
THEN DATEADD(mcs, t.addamount, @startdate)
WHEN 'ms'
THEN DATEADD(ms, t.addamount, @startdate)
WHEN 'ss'
THEN DATEADD(ss, t.addamount, @startdate)
WHEN 'mi'
THEN DATEADD(mi, t.addamount, @startdate)
WHEN 'hh'
THEN DATEADD(hh, t.addamount, @startdate)
WHEN 'dd'
THEN DATEADD(dd, t.addamount, @startdate)
WHEN 'ww'
THEN DATEADD(ww, t.addamount, @startdate)
WHEN 'mm'
THEN DATEADD(mm, t.addamount, @startdate)
WHEN 'qq'
THEN DATEADD(qq, t.addamount, @startdate)
WHEN 'yy'
THEN DATEADD(yy, t.addamount, @startdate)
ELSE DATEADD(dd, t.addamount, @startdate)
END [value]
FROM b
CROSS APPLY(VALUES(IIF(@startdate < @enddate, @interval * rownum, @interval * -rownum))) t(addamount);
GO
/* sample data */
DROP TABLE IF EXISTS #example_df;
GO
CREATE TABLE #example_df
(id INT NOT NULL,
date_start DATE NOT NULL,
date_end DATE NOT NULL,
location VARCHAR(30)
);
GO
INSERT INTO #example_df
(id,
date_start,
date_end,
location
)
VALUES
(1,
'2010-01-30',
'2010-03-25',
'Office 2'
),
(1,
'2010-03-26',
'2010-04-30',
'Office 1'
),
(1,
'2010-06-01',
'2010-08-01',
'Home and so on'
),
(2,
'2010-07-01',
'2010-09-03',
'Office 4'
),
(4,
'2010-06-01',
'2010-07-23',
'Office 5'
),
(4,
'2010-07-24',
'2010-07-31',
'Home'
),
(5,
'2010-07-01',
'2010-07-23',
'Office 1'
),
(5,
'2010-07-24',
'2010-07-31',
'Office 2'
);
/* comparison date range */
DECLARE @start_dt DATE= '2010-07-01', @end_dt DATE= '2010-07-31';
/* final query */
WITH office_dt_cte(id,
range_dt)
AS (SELECT DISTINCT
df.id,
CAST(dr.[value] AS DATE)
FROM #example_df df
CROSS APPLY dbo.daterange(df.date_start, df.date_end, 'dd', 1) dr
WHERE df.location LIKE 'Office%'),
compare_dt_cte(range_dt)
AS (SELECT CAST(dr.[value] AS DATE)
FROM dbo.daterange(@start_dt, @end_dt, 'dd', 1) dr),
matches_cte(id)
AS (SELECT id
FROM office_dt_cte odc
JOIN compare_dt_cte cdc ON odc.range_dt = cdc.range_dt
GROUP BY id
HAVING COUNT(*) = DATEDIFF(day, @start_dt, @end_dt) + 1)
SELECT ed.*
FROM #example_df ed
JOIN matches_cte mc ON ed.id = mc.id
WHERE @start_dt BETWEEN ed.date_start AND ed.date_end
OR @end_dt BETWEEN ed.date_start AND ed.date_end;https://stackoverflow.com/questions/63327151
复制相似问题