我试图根据用户来聚合大约200万行。一个用户有几个事务,每个事务都有一个平台和一个TransactionType将Platform和TransactionType列聚合为json,并保存为单行。
但我的代码很慢。我怎样才能提高成绩呢?
public static void AggregateTransactions()
{
using (var db = new ApplicationDbContext())
{
db.ChangeTracker.AutoDetectChangesEnabled = false;
//Get a list of users who have transactions
var users = db.Transactions
.Select(x => x.User)
.Distinct();
foreach (var user in users.ToList())
{
//Get all transactions for a particular user
var _transactions = db.Transactions
.Include(x => x.Platform)
.Include(x => x.TransactionType)
.Where(x => x.User == user)
.ToList();
//Aggregate Platforms from all transactions for user
Dictionary<string, int> platforms = new Dictionary<string, int>();
foreach (var item in _transactions.Select(x => x.Platform).GroupBy(x => x.Name).ToList())
{
platforms.Add(item.Key, item.Count());
};
//Aggregate TransactionTypes from all transactions for user
Dictionary<string, int> transactionTypes = new Dictionary<string, int>();
foreach (var item in _transactions.Select(x => x.TransactionType).GroupBy(x => x.Name).ToList())
{
transactionTypes.Add(item.Key, item.Count());
};
db.Add<TransactionByDay>(new TransactionByDay
{
User = user,
Platforms = platforms, //The dictionary list is represented as json in table
TransactionTypes = transactionTypes //The dictionary list is represented as json in table
});
db.SaveChanges();
}
}
}更新
因此,数据的基本视图如下所示:
服务数据:
Id: b11c6b67-6c74-4培-f 712-08d609 f20cf,UserId: 1,PlatformId: 3,TransactionypeId: 1
Id: 4782803f-2f6b-4d99-f 717-08d609 af20cf,UserId: 1,PlatformId: 3,TransactionypeId: 4
聚合数据为TransactionPerDay:
Id: 9df41ef2-2fc8-441b-4a2f-08d609e21559,UserId: 1,平台:{"p3":2},TransactionsTypes:{"t1":1,"t4":1}
因此,在这种情况下,两个事务被聚合为一个。您可以看到平台和事务类型将聚合为json。
发布于 2018-08-24 18:13:48
您可能不应该在循环中调用db.saveChanges()。将其置于循环之外,将更改保持一次,可能会有所帮助。
尽管如此,在处理大量数据和性能时,我发现ADO.NET可能是一个更好的选择。这并不意味着您必须停止使用实体框架,但对于这种方法,您可以使用ADO.NET。
发布于 2018-08-24 22:27:56
LinqToSQL不是为速度而构建的(LinqToSQL更容易,速度更快,或者可以使用Linq \ SQL运行直接的SQL命令)。不管怎么说,我不知道这会有多快:
using (var db = new MyContext(connectionstring))
{
var tbd = (from t in db.Transactions
group t by t.User
into g
let platforms = g.GroupBy(tt => tt.Platform.Name)
let trantypes = g.GroupBy(tt => tt.TransactionType.Name)
select new {
User = g.Key,
Platforms = platforms,
TransactionTypes = trantypes
}).ToList()
.Select(u => new TransactionByDay {
User=u.User,
Platforms=u.Platforms.ToDictionary(tt => tt.Key, tt => tt.Count()),
TransactionTypes = u.TransactionTypes.ToDictionary(tt => tt.Key, tt => tt.Count())
});
//...
}发布于 2018-08-24 18:43:32
这样做的目的是尽量减少查询,并首先获取所需的尽可能多的数据。因此,不需要在每个事务中都包含Platform和TransactionType,您只需在Dictionary中查询一次并查找数据。此外,我们还可以并行进行处理,然后立即保存所有数据。
public static void AggregateTransactions()
{
using (var db = new ApplicationDbContext())
{
db.ChangeTracker.AutoDetectChangesEnabled = false;
//Get a list of users who have transactions
var transactionsByUser = db.Transactions
.GroupBy(x => x.User) //Not sure if EF Core supports this kind of grouping
.ToList();
var platforms = db.Platforms.ToDictionary(ks => ks.PlatformId);
var Transactiontypes = db.TransactionTypes.ToDictionary(ks => ks.TransactionTypeId);
var bag = new ConccurentBag<TransactionByDay>();
Parallel.ForEach(transactionsByUser, transaction =>
{
//Aggregate Platforms from all transactions for user
Dictionary<string, int> platforms = new Dictionary<string, int>(); //This can be converted to a ConccurentDictionary
//This can be converted to Parallel.ForEach
foreach (var item in _transactions.Select(x => platforms[x.PlatformId]).GroupBy(x => x.Name).ToList())
{
platforms.Add(item.Key, item.Count());
};
//Aggregate TransactionTypes from all transactions for user
Dictionary<string, int> transactionTypes = new Dictionary<string, int>(); //This can be converted to a ConccurentDictionary
//This can be converted to Parallel.ForEach
foreach (var item in _transactions.Select(x => Transactiontypes[c.TransactionTypeId]).GroupBy(x => x.Name).ToList())
{
transactionTypes.Add(item.Key, item.Count());
};
bag.Add(new TransactionByDay
{
User = transaction.Key,
Platforms = platforms, //The dictionary list is represented as json in table
TransactionTypes = transactionTypes //The dictionary list is represented as json in table
});
});
//Before calling this we may need to check the status of the Parallel ForEach, or just convert it back to regular foreach loop if you see no benefit.
db.AddRange(bag);
db.SaveChanges();
}
}变式2
public static void AggregateTransactions()
{
using (var db = new ApplicationDbContext())
{
db.ChangeTracker.AutoDetectChangesEnabled = false;
//Get a list of users who have transactions
var users = db.Transactions
.Select(x => x.User)
.Distinct().ToList();
var platforms = db.Platforms.ToDictionary(ks => ks.PlatformId);
var Transactiontypes = db.TransactionTypes.ToDictionary(ks => ks.TransactionTypeId);
var bag = new ConccurentBag<TransactionByDay>();
Parallel.ForEach(users, user =>
{
var _transactions = db.Transactions
.Where(x => x.User == user)
.ToList();
//Aggregate Platforms from all transactions for user
Dictionary<string, int> userPlatforms = new Dictionary<string, int>();
Dictionary<string, int> userTransactions = new Dictionary<string, int>();
foreach(var transaction in _transactions)
{
if(platforms.TryGetValue(transaction.PlatformId, out var platform))
{
if(userPlatforms.TryGetValue(platform.Name, out var tmp))
{
userPlatforms[platform.Name] = tmp + 1;
}
else
{
userPlatforms.Add(platform.Name, 1);
}
}
if(Transactiontypes.TryGetValue(transaction.TransactionTypeId, out var type))
{
if(userTransactions.TryGetValue(type.Name, out var tmp))
{
userTransactions[type.Name] = tmp + 1;
}
else
{
userTransactions.Add(type.Name, 1);
}
}
}
bag.Add(new TransactionByDay
{
User = user,
Platforms = userPlatforms, //The dictionary list is represented as json in table
TransactionTypes = userTransactions //The dictionary list is represented as json in table
});
});
db.AddRange(bag);
db.SaveChanges();
}
}https://stackoverflow.com/questions/52009050
复制相似问题