Hi<div><br></div><div>I'm trying to extend some filters in libavfilter in order to make them scalable. However, since I work on Windows I cannot actually build ffmpeg myself and have to work within the existing framework.</div>
<div><br></div><div>I would like to request some extension into libavfilter which would make this simpler. Preferably some way to access a "user context".</div><div><br></div><div>For example I have made an extension to the yadif filter which enables parallel execution, but it is a very ugly solution. I have to create x number of filter_line functions which each have a local "scalable_yadif_context" and have to make sure that each instance of the yadif filter has a unique filter_line functions.</div>
<div><br></div><div>// Need this definition to access and replace filter_line</div><div><div>typedef struct {</div><div> int mode;</div><div> int parity;</div><div> int frame_pending;</div><div> int auto_enable;</div>
<div> AVFilterBufferRef *cur;</div><div> AVFilterBufferRef *next;</div><div> AVFilterBufferRef *prev;</div><div> AVFilterBufferRef *out;</div><div> void (*filter_line)(uint8_t *dst,</div><div> uint8_t *prev, uint8_t *cur, uint8_t *next,</div>
<div> int w, int prefs, int mrefs, int parity, int mode);</div><div> const AVPixFmtDescriptor *csp;</div><div>} YADIFContext;</div><div><br></div><div>struct scalable_yadif_context</div><div>{</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>std::vector<std::function<void()>> calls;</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>int end_prefs;</div><div><br></div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>scalable_yadif_context() : end_prefs(std::numeric_limits<int>::max()){}</div><div>};</div><div><br></div><div>void (*org_yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int prefs, int mrefs, int parity, int mode) = 0;</div>
<div><br></div><div>void scalable_yadif_filter_line(scalable_yadif_context& ctx, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int prefs, int mrefs, int parity, int mode)</div><div>{</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>if(ctx.end_prefs == std::numeric_limits<int>::max())</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>ctx.end_prefs = -prefs;</div><div><br></div><div> // Just enqueue the call for later execution</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>ctx.calls.push_back([=] </div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>{</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>org_yadif_filter_line(dst, prev, cur, next, w, prefs, mrefs, parity, mode);</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>});</div><div> </div><div><span class="Apple-tab-span" style="white-space:pre"> </span>if(prefs == ctx.end_prefs) </div><div><span class="Apple-tab-span" style="white-space:pre"> </span>{<span class="Apple-tab-span" style="white-space:pre"> </span></div>
<div> // Last call. Execute all enqueued calls in parallel.</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>tbb::parallel_for(tbb::blocked_range<size_t>(0, ctx.calls.size()), [=](const tbb::blocked_range<size_t>& r)</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>{</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>for(auto n = r.begin(); n != r.end(); ++n)</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>ctx.calls[n]();</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>});</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>ctx.calls = std::vector<std::function<void()>>();</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>ctx.end_prefs = std::numeric_limits<int>::max();</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>}</div><div>}</div><div><br></div><div>// The code below is a UGLY hack which works for upto 18 concurrent instances.</div><div><br></div><div>#define RENAME(a) f ## a</div>
<div><br></div><div>#define ff(x) \</div><div>void RENAME(x)(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int prefs, int mrefs, int parity, int mode) \</div><div>{\</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>static scalable_yadif_context ctx;\</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>scalable_yadif_filter_line(ctx, dst, prev, cur, next, w, prefs, mrefs, parity, mode);\</div><div>}</div><div><br></div><div>ff(0); ff(1); ff(2); ff(3); ff(4); ff(5); ff(6); ff(7); ff(8); ff(9); ff(10); ff(11); ff(12); ff(13); ff(14); ff(15); ff(16); ff(17);</div>
<div><br></div><div>void (*fs[])(uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int prefs, int mrefs, int parity, int mode) = </div><div><br></div><div>{f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, f16, f17};</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span></div><div>tbb::concurrent_bounded_queue<int> tags; // Poll of scalable_filter_functions</div><div><br></div><div>void init()</div><div>{</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>for(int n = 0; n < 18; ++n)</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>tags.push(n);</div><div>}</div><div><br></div><div>int make_scalable_yadif(AVFilterContext* ctx)</div><div>{</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>static boost::once_flag flag = BOOST_ONCE_INIT;</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>boost::call_once(&init, flag);</div><div><br></div><div><span class="Apple-tab-span" style="white-space:pre"> </span>YADIFContext* yadif = (YADIFContext*)ctx->priv;</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>org_yadif_filter_line = yadif->filter_line; // Data race is not a problem.</div><div><br></div><div><span class="Apple-tab-span" style="white-space:pre"> </span>int tag;</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>if(!tags.try_pop(tag))</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>{</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>LOG(warning) << "Not enough scalable-yadif instances. Running non-scalable";</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>return -1;</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>}</div><div><br></div><div><span class="Apple-tab-span" style="white-space:pre"> </span>yadif->filter_line = fs[tag]; // each function has its own local context</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>return tag;</div><div>}</div><div><br></div><div>void release_scalable_yadif(int tag)</div><div>{</div><div><span class="Apple-tab-span" style="white-space:pre"> </span>if(tag != -1)</div>
<div><span class="Apple-tab-span" style="white-space:pre"> </span>tags.push(tag);</div><div>}</div></div>