我最近读过一篇有趣的博客文章,作者是菲利普·兹考什:它解释了如何通过跟踪堆栈中以前分配的内存来避免不必要的分配/释放。
编写替代智能指针(如std::unique_ptr和std::shared_ptr)的插入代码非常容易,这些指针将利用内存回收器。
当创建智能指针时,在分配之前,检查回收器中是否有未使用的分配内存块就足够了:如果有,使用它,否则分配新内存。
在销毁智能指针时,只需调用对象的析构函数,然后将现在未定义/无法使用的内存块推到内存回收器堆栈上,这样就可以使用相同类型的其他智能指针了。
namespace ssvu
{
namespace Internal
{
template<typename T, typename TBase> class Recycler
{
private:
std::vector<TBase*> ptrs;
std::allocator<T> alloc;
inline T* pop() noexcept
{
SSVU_ASSERT(!ptrs.empty());
auto result(ptrs.back());
ptrs.pop_back();
return reinterpret_cast<T*>(result);
}
public:
inline Recycler() = default;
// Deallocate all memory on destruction
inline ~Recycler() noexcept { for(auto p : ptrs) alloc.deallocate(reinterpret_cast<T*>(p), 1); }
// Recycle: call the allocated object's destructor, but do not deallocate memory
// Instead, store the pointer to the allocated memory block for reuse
inline void recycle(TBase* mPtr) noexcept(noexcept(alloc.destroy(mPtr)))
{
SSVU_ASSERT(mPtr != nullptr);
alloc.destroy(mPtr);
ptrs.emplace_back(mPtr);
}
// Create: either reuse allocated unused memory, or allocate a new memory block
template<typename... TArgs> inline T* create(TArgs&&... mArgs)
{
auto result(ptrs.empty() ? alloc.allocate(1) : pop());
alloc.construct(result, std::forward<TArgs>(mArgs)...);
return result;
}
};
template<typename T, typename TBase> inline Recycler<T, TBase>& getRecycler() noexcept
{
static Recycler<T, TBase> result; return result;
}
}
// Uptr<T, TDeleter> is a typedef for std::unique_ptr<T, TDeleter>
template<typename T, typename TBase> using UptrRecPoly = Uptr<T, void(*)(TBase*)>;
template<typename T> using UptrRec = UptrRecPoly<T, T>;
template<typename TBase> using VecUptrRec = std::vector<UptrRec<TBase>>;
template<typename T, typename TBase, typename... TArgs> inline UptrRecPoly<T, TBase> makeUptrRecPoly(TArgs&&... mArgs)
{
return {Internal::getRecycler<T, TBase>().create(std::forward<TArgs>(mArgs)...), [](TBase* mPtr)
{
Internal::getRecycler<T, TBase>().recycle(mPtr);
}};
}
template<typename T, typename... TArgs> inline UptrRec<T> makeUptrRec(TArgs&&... mArgs) { return makeUptrRecPoly<T, T>(std::forward<TArgs>(mArgs)...); }
}每个static Recycler类型对都有一个<T, TBase>实例。
示例:
struct Base { ~Base(); };
struct Der1 : Base { };
struct Der2 : Base { };
VecUptrRec<Base> vec;
// Recycler<Der1, Base> is empty - allocate 2 memory blocks
vec.emplace_back(std::move(makeUptrRecPoly<Der1, Base>());
vec.emplace_back(std::move(makeUptrRecPoly<Der1, Base>());
// Recycler<Der2, Base> is empty - allocate 2 memory blocks
vec.emplace_back(std::move(makeUptrRecPoly<Der2, Base>());
vec.emplace_back(std::move(makeUptrRecPoly<Der2, Base>());
vec.clear();
// Now both Recycler<Der1, Base> and Recycler<Der2, Base> contain 2 allocated (but unused) memory blocks
// Recycler<Der1, Base> won't allocate any additional memory
vec.emplace_back(std::move(makeUptrRecPoly<Der1, Base>());
vec.emplace_back(std::move(makeUptrRecPoly<Der1, Base>());
// Recycler<Der2, Base> won't allocate any additional memory
vec.emplace_back(std::move(makeUptrRecPoly<Der2, Base>());
vec.emplace_back(std::move(makeUptrRecPoly<Der2, Base>());// Benchmark results
// ---
// Time needed to continuously fill/clear vectors of 10000000 polymorphic objects.
[Benchmark #1 - <Vector<Uptr>>] 4896 ms
[Benchmark #1 - <Vector<UptrRecPoly>>] 3506 ms
[Benchmark #1 - <Vector<Uptr>>] 5631 ms
[Benchmark #1 - <Vector<UptrRecPoly>>] 3436 ms
[Benchmark #1 - <Vector<Uptr>>] 5126 ms
[Benchmark #1 - <Vector<UptrRecPoly>>] 3277 ms
// Time needed to create 3500000 polymorphic objects (half of them of type Der1 : Base, half of them of type Der2 : Base)
// The objects are stored in a std::vector of Uptr<Base> or UptrRec<Base>.
// For 20 times, the objects are iterated upon, and every third object is marked as "dead". After that, dead objects are removed with the erase-remove idiom.
// After dead objects removal, the vector is cleared and filled again with 3500000 objects.
[Benchmark #1 - <MemoryManager>] 12956 ms
[Benchmark #1 - <RecMemoryManager>] 7548 ms
[Benchmark #1 - <MemoryManager>] 12974 ms
[Benchmark #1 - <RecMemoryManager>] 8349 ms
[Benchmark #1 - <MemoryManager>] 13582 ms
[Benchmark #1 - <RecMemoryManager>] 8568 ms基准不断地创建/销毁智能指针。性能几乎是普通智能指针的2倍。
发布于 2014-06-04 19:53:09
我的主要抱怨是在这样低级别的接口中使用std::vector。我认为这完全是浪费资源。
想知道这与基于向量的版本相比是如何执行的。
我基本上把std::vector<TBase>换成了一个TBase*,并构建了一个链。它假设TBase至少有指针的大小(但我相信我们可以添加静态断言来实现这一点)。
namespace ssvu
{
namespace Internal
{
template<typename T, typename TBase> class Recycler
{
struct Chain
{
Chain* next;
};
private:
Chain* chain;
std::allocator<T> alloc;
T* pop() noexcept
{
SSVU_ASSERT(chain != nullptr);
TBase* result = reinterpret_cast<TBase*>(chain);
chain = chain->next;
return result;
}
public:
Recycler()
: chain(nullptr)
{}
// Deallocate all memory on destruction
~Recycler() noexcept
{
Chain* next;
for(;chain != nullptr; chain = next)
{
next = chain->next;
alloc.deallocate(reinterpret_cast<TBase*>(chain), 1);
}
}
// Recycle: call the allocated object's destructor, but do not deallocate memory
// Instead, store the pointer to the allocated memory block for reuse
void recycle(TBase* mPtr) noexcept(noexcept(alloc.destroy(mPtr)))
{
SSVU_ASSERT(mPtr != nullptr);
alloc.destroy(mPtr);
Chain* newHead = reinterpret_cast<Chain*>(mptr);
newHead->next = chain;
chain = newHead;
}
// Create: either reuse allocated unused memory, or allocate a new memory block
template<typename... TArgs> T* create(TArgs&&... mArgs)
{
auto result(chain == nullptr ? alloc.allocate(1) : pop());
alloc.construct(result, std::forward<TArgs>(mArgs)...);
return result;
}
};
template<typename T, typename TBase> Recycler<T, TBase>& getRecycler() noexcept
{
static Recycler<T, TBase> result; return result;
}
}
// Uptr<T, TDeleter> is a typedef for std::unique_ptr<T, TDeleter>
template<typename T, typename TBase> using UptrRecPoly = Uptr<T, void(*)(TBase*)>;
template<typename T> using UptrRec = UptrRecPoly<T, T>;
template<typename TBase> using VecUptrRec = std::vector<UptrRec<TBase>>;
template<typename T, typename TBase, typename... TArgs> inline UptrRecPoly<T, TBase> makeUptrRecPoly(TArgs&&... mArgs)
{
return {Internal::getRecycler<T, TBase>().create(std::forward<TArgs>(mArgs)...), [](TBase* mPtr)
{
Internal::getRecycler<T, TBase>().recycle(mPtr);
}};
}
template<typename T, typename... TArgs> inline UptrRec<T> makeUptrRec(TArgs&&... mArgs) { return makeUptrRecPoly<T, T>(std::forward<TArgs>(mArgs)...); }
}(但这完全是我个人的喜好,所以没那么重要)。
template<typename T, typename TBase> using UptrRecPoly = Uptr<T, void(*)(TBase*)>;
template<typename T> using UptrRec = UptrRecPoly<T, T>;
template<typename TBase> using VecUptrRec = std::vector<UptrRec<TBase>>;我相信这会让你读起来容易得多。
关键字inline在代码内联中绝对不起作用。别用它来做那个。它只在一个定义规则中起作用,所以如果一个函数是在包含到多个编译单元的头文件中定义的,那么就需要它。
https://codereview.stackexchange.com/questions/52433
复制相似问题