以下简化实体模型,工作正常散装/组基于插入去归一化数据#BulkData
(改进建议表示欢迎):Set-based的批量导入
IF OBJECT_ID('tempdb..#Things') IS NOT NULL
DROP TABLE #Things
IF OBJECT_ID('tempdb..#Categories') IS NOT NULL
DROP TABLE #Categories
IF OBJECT_ID('tempdb..#ThingsToCategories') IS NOT NULL
DROP TABLE #ThingsToCategories
IF OBJECT_ID('tempdb..#BulkData') IS NOT NULL
DROP TABLE #BulkData
CREATE TABLE #Things
(
ThingId INT IDENTITY(1,1) PRIMARY KEY,
ThingName NVARCHAR(255)
)
CREATE TABLE #Categories
(
CategoryId INT IDENTITY(1,1) PRIMARY KEY,
CategoryName NVARCHAR(255)
)
CREATE TABLE #ThingsToCategories
(
ThingId INT,
CategoryId INT
)
CREATE TABLE #BulkData
(
ThingName NVARCHAR(255),
CategoryName NVARCHAR(255)
)
-- the following would be done from a flat file via a bulk import
INSERT INTO #BulkData
SELECT N'Thing1', N'Category1'
UNION
SELECT N'Thing2', N'Category1'
UNION
SELECT N'Thing3', N'Category2'
INSERT INTO #Categories
SELECT DISTINCT CategoryName
FROM #BulkData
WHERE CategoryName NOT IN (SELECT DISTINCT CategoryName
FROM #Categories)
INSERT INTO #Things
SELECT DISTINCT ThingName
FROM #BulkData
WHERE ThingName NOT IN (SELECT DISTINCT ThingName FROM #Things)
INSERT INTO #ThingsToCategories
SELECT ThingId, CategoryId
FROM #BulkData
INNER JOIN #Things ON #BulkData.ThingName = #Things.ThingName
INNER JOIN #Categories ON #BulkData.CategoryName = #Categories.CategoryName
SELECT * FROM #Categories
SELECT * FROM #Things
SELECT * FROM #ThingsToCategories
我在上面提到的一个问题是#Things
中的数据在数据插入到#ThingsToCategories
之前是可以访问的。
我可以在事务(?)中包装上述内容,以便在整个批量导入完成时使#Things可用吗?
像这样:
BEGIN TRANSACTION X
-- insert into all normalised tables
COMMIT TRANSACTION X
是否与几百万的记录,虽然这项工作?
我想也可以降低日志级别?
为什么重要的是它在顺序查询之前是“可访问的”? –
将事情看作“聚合根”。如果在#ThingsToCategories填充之前检索到某个东西,则客户端看到的任何内容都可能不代表现实(希望这是有道理的)。 – cs0815
但是表#只能在运行查询的会话中访问,并且它会按顺序运行? –