1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
| SET STATISTICS IO ON
GO
DECLARE @ITEMS NVARCHAR(MAX)
SET @ITEMS = N'25201,25198,25195,25192,25189,25186,25183,25180,25177,25174,25171,25168,25165,25162,25159,25156,25153,25150,25147,25144,25141,25138,25135,25132,25129,25126,25123,25120,25117,25114,25111,25108,25105,25102,25099,25096,25093,25090,25087,25084,25081,25078,25075,25072,25069,25066,25063,25060,25057,25054,25051,25048,25045,25042,25039,25036,25033,25030,25027,25024,25021,25018,25015,25012,25009,25006,25003,25000,24997,24994,24991,24988,24985,24982,24979,24976,24973,24970,24967,24964,24961,24958,24955,24952,24949,24946,24943,24940,24937,24934,24931,24928,24925,24922,24919,24916,24913,24910,24907,24904,24901,24898,24895,24892,24889,24886,24883,24880,24877,24874,24871,24868,24865,24862,24859,24856,24853,24850,24847,24844,24841,24838,24835,24832,24829,24826,24823,24820,24811,24808,24805,24799,24796,24790,24787,24784,24781,24763,24742,24736,24733,24730,24718,24715,24703,24700,24694,24685,24682,24679,24670,24667,24664,24661,24652,24646,24637,24631,24628,24622,24616,24613,24610,24607,24604,24601,24598,24592,24589,24586,24583,24580,24577,24571,24565,24562,24553,24550,24541,24535,24529,24523,24520,24517,24511,24508,24484,24481,24478,24472,24463,24457,24454,24436,24433,24430,24424,24415,24412,24409,24403,24397,24376,24370,24352,24349,24346,24343,24340,24337,24334,24331,24328,24325,24322,24319,24316,24313,24310,24307,24304,24301,24298,24295,24292,24289,24286,24283,24280,24277,24274,24271,24268,24265,24262,24259,24256,24253,24250,24247,24244,24241,24238,24235,24232,24229,24226,24223,24220,24217,24214,24211,24208,24205,24202,24199,24196,24193,24190,23485,23482,23479,23470,23467,22816,22807,22801,22795,22789,22786,22780,22774,22768,22762,22756,22750,22744,22732,22726,22714,22708,22705,22699,22585,22582,22579,22576,22573,22570,22567,22564,22561,22558,22555,22522,22519,22498,22495,22492,22489,22486,22483,22480,22477,22474,22471,22468,22465,22462,22459,22456,22453,22411,22408,22405,22402,22399,22396,22393,22390,22387,22384,22381,22378,22375,22372,22369,22366,22363,22360,22357,22354,22351,22348,22345,22342,22339,22336,22333,22330,22327,22324,22321,22318,22315,22312,22309,22267,22264,22261,22258,22207,22204,22201,22198,22195,22192,22189,22186,22183,22180,22177,22174,22171,22168,22165,22162,22159,22156,22153,22150,22147,22144,22141,22138,22135,22039,22036,22033,22030,22018,21979,21976,21973,21970,21967,21940,21937,21934,21931,21928,21925,21922,21784,21781,21775,21772,21769,21766,21763,21760,21757,21754,21751,21748,21745,21742,21739,21736,21733,21730,21727,21724,21721,21718,21715,21712,21709,21706,21703,21700,21697,21694,21691,21688,21685,21682,21679,21676,21673,21670,21667,21664,21661,21658,21655,21652,21649,21646,21643,21640,21637,21634,21631,21628,21625,21622,21619,21616,21613,21610,21607,21604,21601,21598,21592,21586,21580,21574,21571,21568,21565,21562,21559,21556,21553,21124,21118,21109,21103,20113,20110,20107,20104,20101,20098,20095,20092,20089,20008,20005,20002,19999,19996,19993,19990,19987,19984,19981,19978,19975,19972,19969,19966,19963,19960,19957,19954,19951,19948,19945,19942,19939,19936,19933,19930,19927,19924,19921,19918,19915,19912,19909,19906,19903,19900,19897,19894,19891,19888,19885,19882,19879,19876,19873,19807,19804,19801,19798,19795,19792,19789,19786,19783,19780,19777,19774,19771,19768,19765,19762,19759,19756,19753,19750,19747,19744,19741,19738,19735,19732,19729,19726,19723,19720,19717,19714,19711,19105,19102,19099,19096,19093,19090,19087,19084,19081,19078,19075,19072,19069,19066,19063,19060,19057,19054,19051,19048,19045,19042,19039,19036,19033,19030,19027,19024,19021,19018,19015,19012,19009,19006,19003,19000,18997,18994,18991,18988,18985,18982,18979,18976,18973,18970,18967,18964,18961,18958,18955,18952,18949,18946,18943,18940,18937,18934,18931,18928,18925,18922,18919,18916,18913,18910,18907,18904,18901,18898,18895,18892,18889,18886,18883,18880,18877,18874,18871,18868,18865,18862,18859,18856,18853,18850,18847,18844,18841,18838,18835,18832,18829,18826,18823,18820,18817,18814,18811,18808,18805,18802,18799,18796,18793,18790,18787,18784,18781,18778,18775,18772,18769,18766,18763,18760,18757,18754,18751,18748,18745,18742,18739,18736,18733,18730,18727,18724,18721,18718,18715,18712,18709,18706,18703,18700,18697,18694,18691,18688,18685,18682,18679,18631,18628,18625,18622,18619,18616,18613,18610,18607,18604,18601,18598,18595,18592,18589,18586,18583,18580,18577,18574,18571,18568,18565,18562,18559,18556,18553,18550,18547,18544,18541,18538,18535,18532,18529,18526,18523,18520,18517,18514,18511,18508,18505,18502,18499,18496,18493,18490,18487,18484,18481,18478,18475,18472,18469,18466,18463,18460,18457,18454,18451,18448,18445,18442,18439,18436,18433,18430,18427,18424,18421,18418,18415,18412,18409,18406,18403,18400,18397,18394,18391,18388,18385,18382,18379,18376,18373,18370,18367,18364,18361,18358,18355,18352,18349,18346,18343,18340,18337,18334,18331,18328,18325,18322,18319,18316,18313,18310,18307,18304,18301,18298,18295,18292,18289,18286,18283,18280,18277,18274,18271,18268,18265,18262,18259,18256,18253,18250,18247,18241,18238,18235,18232,18226,18223,18220,18217,18208,18205,18202,18199,18157,18151,18148,18145,18142,17539,17536,17533,17530,17527,17524,17521,17518,17515,17512,17509,17506,17503,17500,17497,17494,17491,17410,17407,17404,17401,17398,17395,17392,17389,17386,17383,17380,17377,17374,17371,17368,17365,17362,16996,16993,16990,16987,16984,16981,16633,16267,16264,16261,16258,16255,16252,16249,16246,16243,16240,16237,16234,16231,16228,16225,16222,16219,16216,16213,16210,16207,16204,16201,16198,16195,16192,16189,16186,16183,16180,16177,16174,16171,15853,15850,415095,415092,415089,415086,415083,415080,415077,415074,415071,415068,415065,415062,415059,415056,415053,415050,415047,415044,415041,415038,415035,415032,415029,415026,415023,415020,415017,415014,415011,415008,415005,415002,414999,414996,414993,414990,414987,414984,414981,414978,414975,414972,414969,414966,414963,414960,414957,414954,414951,414948,414945,414942,414939,414936,414933,414930,414927,414924,414921,414918,414915,414912,414909,414906,414903,414900,414897,414894,414891,414888,414885,414882,414879,414876,414873,414870,414867'
DECLARE @sep NCHAR
SET @sep = N';'
DECLARE @SOURCE SYSNAME
SET @SOURCE = 'enron_3'
CREATE TABLE #temp
([id] INT,
[sent] DATETIME,
[md5] NVARCHAR(45),
[node_id] INT,
[location] NVARCHAR(300),
[subject] NVARCHAR(MAX),
[sender] NVARCHAR(200),
[recipients] NVARCHAR(MAX),
[CC] NVARCHAR(MAX),
[BCC] NVARCHAR(MAX)
)
CREATE NONCLUSTERED INDEX IX_temp_table
ON #temp(md5, node_id)
SELECT element into #E FROM dbo.SplitString(@ITEMS,',')
CREATE CLUSTERED INDEX IX_C_Element ON #E ( element )
INSERT INTO #temp SELECT Items.[ItemID] AS id, Items.[EmailDate] AS sent, Items.[MD5Hash] as md5, Items.[ParentID] AS node_id, Items.[ItemFilePath] AS location, Items.[EmailSubject] AS subject, Items.[EmailSender] AS sender, Items.[EmailRecipients] AS recipients, Items.[EmailCCRecipients] AS CC, Items.[EmailBCCRecipients] AS BCC
FROM [enron_3].[dbo].[Items] AS Items
RIGHT JOIN #E AS Split
ON Items.ItemID = Split.element;
WITH QueryB AS (
SELECT [md5], [sent], [location], [subject],
rn = row_number() OVER (PARTITION BY md5 ORDER BY sent)
FROM #temp
)
--fill hits with distinct items.
INSERT INTO dbo.[hits]
SELECT [md5], [sent], [location], [subject]
FROM QueryB q
WHERE [rn] = 1
AND NOT EXISTS (SELECT * FROM dbo.[hits] h WHERE h.md5 = q.md5)
--fill hits_nodes with every node that contains a specific md5
INSERT INTO dbo.[hits_nodes]
SELECT DISTINCT t.[md5], t.[node_id]
FROM #temp t
WHERE t.[md5] IS NOT NULL
AND t.[node_id] IS NOT NULL
AND NOT EXISTS (SELECT * FROM dbo.hits_nodes h WHERE h.md5 = t.md5 )
-- Insert sender addresses
INSERT INTO dbo.email_addresses (email)
SELECT dbo.Trim(t.[Sender])
FROM #temp AS t
WHERE NOT EXISTS (SELECT * FROM email_addresses WHERE email = dbo.Trim(t.[Sender]))
GROUP BY dbo.Trim(t.sender)
-- make sender relations
INSERT INTO dbo.hits_addresses (md5, email_address_id, relation)
SELECT t.[md5], e.[id], 1
FROM #temp AS t
INNER JOIN email_addresses AS e
ON t.sender = e.email
WHERE NOT EXISTS (SELECT * FROM hits_addresses h WHERE h.email_address_id = e.id AND h.relation = 1)
GROUP BY t.[md5], e.[id]
--insert all distinct recipients into email_addresses
INSERT INTO dbo.email_addresses (email)
SELECT dbo.Trim(SUBSTRING(@sep + t.[recipients] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[recipients] + @sep, n.number+1) - n.number - 1)) AS [Value]
FROM dbo.[numbers] n
CROSS JOIN #temp t
WHERE n.number < LEN(@sep + t.[recipients] + @sep)
AND SUBSTRING(@sep + t.[recipients] + @sep, n.number, 1) = @sep
AND NOT dbo.Trim(SUBSTRING(@sep + t.[recipients] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[recipients] + @sep, n.number+1) - n.number - 1)) = ''
AND dbo.Trim(SUBSTRING(@sep + t.[recipients] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[recipients] + @sep, n.number+1) - n.number - 1)) LIKE '%@%.%'
AND NOT EXISTS (SELECT * FROM email_addresses WHERE email = dbo.Trim(SUBSTRING(@sep + t.[recipients] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[recipients] + @sep, n.number+1) - n.number - 1)))
GROUP BY dbo.Trim(SUBSTRING(@sep + t.[recipients] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[recipients] + @sep, n.number+1) - n.number - 1))
--insert all distinct CC into email_addresses
INSERT INTO dbo.email_addresses (email)
SELECT dbo.Trim(SUBSTRING(@sep + t.[CC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[CC] + @sep, n.number+1) - n.number - 1)) AS [Value]
FROM dbo.[numbers] n
CROSS JOIN #temp t
WHERE n.number < LEN(@sep + t.[CC] + @sep)
AND SUBSTRING(@sep + t.[CC] + @sep, n.number, 1) = @sep
AND NOT dbo.Trim(SUBSTRING(@sep + t.[CC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[CC] + @sep, n.number+1) - n.number - 1)) = ''
AND dbo.Trim(SUBSTRING(@sep + t.[CC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[CC] + @sep, n.number+1) - n.number - 1)) LIKE '%@%.%'
AND NOT EXISTS (SELECT * FROM email_addresses WHERE email = dbo.Trim(SUBSTRING(@sep + t.[CC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[CC] + @sep, n.number+1) - n.number - 1)))
GROUP BY dbo.Trim(SUBSTRING(@sep + t.[CC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[CC] + @sep, n.number+1) - n.number - 1))
--insert all distinct BCC into email_addresses
INSERT INTO dbo.email_addresses (email)
SELECT dbo.Trim(SUBSTRING(@sep + t.[BCC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[BCC] + @sep, n.number+1) - n.number - 1)) AS [Value]
FROM dbo.[numbers] n
CROSS JOIN #temp t
WHERE n.number < LEN(@sep + t.[BCC] + @sep)
AND SUBSTRING(@sep + t.[BCC] + @sep, n.number, 1) = @sep
AND NOT dbo.Trim(SUBSTRING(@sep + t.[BCC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[BCC] + @sep, n.number+1) - n.number - 1)) = ''
AND dbo.Trim(SUBSTRING(@sep + t.[BCC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[BCC] + @sep, n.number+1) - n.number - 1)) LIKE '%@%.%'
AND NOT EXISTS (SELECT * FROM email_addresses WHERE email = dbo.Trim(SUBSTRING(@sep + t.[BCC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[BCC] + @sep, n.number+1) - n.number - 1)))
GROUP BY dbo.Trim(SUBSTRING(@sep + t.[BCC] + @sep, n.number+1, CHARINDEX(@sep, @sep + t.[BCC] + @sep, n.number+1) - n.number - 1))
INSERT INTO dbo.hits_addresses (md5, email_address_id, relation)
SELECT X.MD5, E.ID, X.Relation FROM
(
SELECT t.md5 as MD5, dbo.Trim(SUBSTRING(@sep+t.[recipients]+@sep,n.number+1,CHARINDEX(@sep,@sep+t.[recipients]+@sep,n.number+1)-n.number-1)) AS Email, 2 as Relation
FROM dbo.numbers n
CROSS JOIN #temp t
WHERE n.number < LEN(@sep+t.[recipients]+@sep)
AND SUBSTRING(@sep+t.[recipients]+@sep,n.number,1) = @sep
UNION ALL
SELECT t.md5 as MD5, dbo.Trim(SUBSTRING(@sep+t.[CC]+@sep,n.number+1,CHARINDEX(@sep,@sep+t.[CC]+@sep,n.number+1)-n.number-1)) AS Email, 3 as Relation
FROM dbo.numbers n
CROSS JOIN #temp t
WHERE n.number < LEN(@sep+t.[CC]+@sep)
AND SUBSTRING(@sep+t.[CC]+@sep,n.number,1) = @sep
UNION ALL
SELECT t.md5 as MD5, dbo.Trim(SUBSTRING(@sep+t.[BCC]+@sep,n.number+1,CHARINDEX(@sep,@sep+t.[BCC]+@sep,n.number+1)-n.number-1)) AS Email, 4 as Relation
FROM dbo.numbers n
CROSS JOIN #temp t
WHERE n.number < LEN(@sep+t.[BCC]+@sep)
AND SUBSTRING(@sep+t.[BCC]+@sep,n.number,1) = @sep
) X INNER JOIN email_addresses e
ON X.email = e.email
GROUP BY X.md5, e.id, X.Relation
--inserts all custodians where message is found with relation to message into hits_addresses
INSERT INTO dbo.hits_addresses (md5, email_address_id, relation)
SELECT t.md5, c.id, 5
FROM #temp t
INNER JOIN dbo.hits_nodes h
ON t.md5 = h.md5
INNER JOIN dbo.nodes n
ON h.node_id = n.id
INNER JOIN dbo.discoveryjobs d
ON n.discoveryjob_id = d.id
INNER JOIN dbo.custodians c
ON d.custodian_id = c.id
WHERE NOT EXISTS (SELECT * FROM hits_addresses WHERE md5 = t.md5 AND email_address_id = c.id)
DROP TABLE #E
DROP TABLE #temp |