|
我有一张road_events桌子:2 ]3 j+ T6 W/ D, k7 |
create table road_events event_id number road_id number year number from_meas number(10,2) to_meas number(10,2) total_road_length number );insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (1,1,2020,25,50,100);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values 100);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (3,1,1980,0,25,100);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (4,1,1960,75,100,100);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (6、2、2000、10、30、100);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (7、2、1975、30、60、100);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (8、2、1950、50、90、100);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (9、3、2050、40、90、100);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values ;insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values ;insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (13,5、1985、50、70、300);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (14、5、1985、10、50、300);insert into road_events (event_id,road_id,year,from_meas,to_meas,total_road_length) values (15、5、1965、1、301、300);commit;select * from road_events; EVENT_ID ROAD_ID YEAR FROM_MEAS TO_MEAS TOTAL_ROAD_LENGTH---------- ---------- ---------- ---------- ---------- ----------------- 5 5 5 我想选择代表每条路上最新工作的事件。
8 }9 E/ b1 n/ i: f因为事件 通常只涉及道路的一部分 。这意味着我不能简单地选择每条路的最新事件。我只需要选择最近不重复的 活动里程
" D! a O Y( f" U$ D+ W' Z。! A. H S3 u4 f6 W6 P$ \3 v
可能的逻辑(按顺序):
/ M6 g# w" C) C" H5 L8 w我不想猜测如何解决这个问题,因为它最终可能会带来更大的伤害(类似于; z# a- e1 c2 o' [" |3 k0 d% p/ {# }
XY问题)。另一方面,它可以提供对问题本质的洞察,所以它来了:
% z4 N: p$ o* J5 t. y' D' j+ A[ol]选择每条道路的最新事件。我们将调用最近的事件:event A。6 B* H; G) |# S; o0 H) v
如果event A 是>= total_road_length,这就是我所需要的。算法到此结束。
; S6 o# C! m9 y$ ?- t否则,获得下一个时间顺序事件(event B),其范围与范围不同event A。
7 c+ U* e8 s' Q& ]0 I$ i如果event B重叠范围event A,则仅得到event B不重叠的部分。
9 Q5 t; L; v5 b重复步骤3和4,直到事件总长度为= total_road_length。或者,当路上没有更多的活动时,停下来。[/ol]问题:4 l! ^% s: G" b
我知道这是一项艰巨的任务,但 该怎么办?
! d0 N0 T m' Z" H2 _6 q9 d这是经典 线性参考 问题。如果我能把线性引用操作作为查询的一部分,会很有帮助。
7 t# Y; ]7 Y# ]3 w' h/ Z; ~& R1 S, Q结果将是:
$ h+ v6 n4 _1 y: f' X+ { EVENT_ID ROAD_ID YEAR TOTAL_ROAD_LENGTH EVENT_LENGTH---------- ---------- ---------- ----------------- ------------ 5 300 # r7 C1 @& g ^, Y. v+ n1 B3 }" {6 `% q
解决方案:
) V* ]$ A; m( y( o 我的主要DBMS是Teradata,但这在Oracle也可以使用。6 ^& G! w$ A) j
WITH all_meas AS ( -- get a distinct list of all from/to points SELECT road_id,from_meas AS meas FROM road_events UNION SELECT road_id,to_meas FROM road_events )-- select * from all_meas order by 1,2 ,all_ranges AS ( -- create from/to ranges SELECT road_id,meas AS from_meas Lead(meas) Over (PARTITION BY road_id ORDER BY meas) AS to_meas FROM all_meas ) -- SELECT * from all_ranges order by 1,2,all_event_ranges AS ( -- now match the ranges to the event ranges SELECT ar.* re.event_id re.year re.total_road_length ar.to_meas - ar.from_meas AS event_length -- used to filter the latest event as multiple events might cover the same range Row_Number() Over (PARTITION BY ar.road_id,ar.from_meas ORDER BY year DESC) AS rn FROM all_ranges ar JOIN road_events re ON ar.road_id = re.road_id AND ar.from_meas re.from_meas WHERE ar.to_meas IS NOT NULL )SELECT event_id,road_id,year,total_road_length,Sum(event_length)FROM all_event_rangesWHERE rn = 1 -- latest year onlyGROUP BY event_id,road_id,year,total_road_lengthORDER BY road_id,year DESC;如果您需要返回实际承保范围from/to_meas(如编辑之前的问题所示),可能会更复杂。第一部分是一样的,但是查询可以在不聚合的情况下返回相同的event_id相邻行(例如,事件3:0-1和1-25):
* N) W% s1 s( T8 u" P5 @8 ^SELECT * FROM all_event_rangesWHERE rn = 1ORDER BY road_id,from_meas;要合并相邻行,还需要两个步骤(使用标准方法,标记组第一行并计算组号):( R8 s7 S2 A+ f5 `% P! c0 w* i
WITH all_meas AS ( SELECT road_id,from_meas AS meas FROM road_events UNION SELECT road_id,to_meas FROM road_events )-- select * from all_meas order by 1,2 ,all_ranges AS SELECT road_id,meas AS from_meas Lead(meas) Over (PARTITION BY road_id ORDER BY meas) AS to_meas FROM all_meas )-- SELECT * from all_ranges order by 1,2,all_event_ranges AS ( SELECT ar.* re.event_id re.year re.total_road_length ar.to_meas - ar.from_meas AS event_length Row_Number() Over (PARTITION BY ar.road_id,ar.from_meas ORDER BY year DESC) AS rn FROM all_ranges ar JOIN road_events re ON ar.road_id = re.road_id AND ar.from_meas re.from_meas WHERE ar.to_meas IS NOT NULL )-- SELECT * FROM all_event_ranges WHERE rn = 1 ORDER BY road_id,from_meas,adjacent_events AS ( -- assign 1 to the 1st row of an event SELECT t.* CASE WHEN Lag(event_id) Over(PARTITION BY road_id ORDER BY from_meas) = event_id THEN ELSE 1 END AS flag FROM all_event_ranges t WHERE rn = 1 )-- SELECT * FROM adjacent_events ORDER BY road_id,from_meas ,grouped_events AS ( -- assign a groupnumber to adjacent rows using a Cumulative Sum over 0/1 SELECT t.* Sum(flag) Over (PARTITION BY road_id ORDER BY from_meas ROWS Unbounded Preceding) AS grp FROM adjacent_events t)-- SELECT * FROM grouped_events ORDER BY road_id,from_measSELECT event_id,road_id,year,Min(from_meas),Max(to_meas),total_road_length,Sum(event_length)FROM grouped_eventsGROUP BY event_id,road_id,grp,year,total_road_lengthORDER BY 2,Min(from_meas); |
|