OSSIA
Open Scenario System for Interactive Application
libav.hpp
1 #pragma once
2 #include <ossia/detail/config.hpp>
3 
4 #if __has_include(<libavcodec/avcodec.h>) && \
5  __has_include(<libavformat/avformat.h>) && \
6  __has_include(<libavdevice/avdevice.h>) && \
7  __has_include(<libavutil/frame.h>) && \
8  __has_include(<libswresample/swresample.h>) && \
9  __has_include(<libswscale/swscale.h>)
10 
11 #define OSSIA_HAS_LIBAV 1
12 #include <ossia/detail/flicks.hpp>
13 
14 extern "C" {
15 #include <libavcodec/avcodec.h>
16 #include <libavformat/avformat.h>
17 #include <libavutil/channel_layout.h>
18 #include <libavutil/version.h>
19 #include <libswresample/swresample.h>
20 }
21 
22 namespace ossia
23 {
24 // Try to find a flag that is likely not going to be taken by AVSEEK_FLAG_... ever
25 static constexpr const int OSSIA_LIBAV_SEEK_ROUGH = 0b00000100000000000000000000000000;
26 inline bool seek_to_flick(
27  AVFormatContext* format, AVCodecContext* codec, AVStream* stream, int64_t flicks,
28  int flags = 0)
29 {
30  constexpr auto flicks_tb = AVRational{1, ossia::flicks_per_second<int64_t>};
31  constexpr auto av_tb = AVRational{1, AV_TIME_BASE};
32 
33  const auto dts = av_rescale_q_rnd(flicks, flicks_tb, av_tb, AVRounding::AV_ROUND_DOWN);
34 
35  avio_flush(format->pb);
36  avformat_flush(format);
37  if(codec)
38  avcodec_flush_buffers(codec);
39 
40  if(flags & OSSIA_LIBAV_SEEK_ROUGH)
41  {
42  // Seems to work better for seeking for thumbnails
43  flags &= ~OSSIA_LIBAV_SEEK_ROUGH;
44  if(av_seek_frame(format, -1, dts, flags) < 0)
45  return false;
46  }
47  else
48  {
49  // Seems to work better for seeking for video & audio
50  if(avformat_seek_file(format, -1, INT64_MIN, dts, INT64_MAX, flags) < 0)
51  return false;
52  }
53 
54  avio_flush(format->pb);
55  avformat_flush(format);
56  if(codec)
57  avcodec_flush_buffers(codec);
58 
59  return true;
60 }
61 
62 static inline int avstream_get_audio_channels(AVStream& stream) noexcept
63 {
64 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
65  return stream.codecpar->ch_layout.nb_channels;
66 #else
67  return stream.codecpar->channels;
68 #endif
69 }
70 
71 struct libav_handle
72 {
73  AVFormatContext* format{};
74  AVStream* stream{};
75  AVCodecContext* codec{};
76  SwrContext* resample{};
77 
78  libav_handle() = default;
79  libav_handle(const libav_handle& other) = delete;
80  libav_handle(libav_handle&& other)
81  {
82  format = other.format;
83  other.format = nullptr;
84  stream = other.stream;
85  other.stream = nullptr;
86  codec = other.codec;
87  other.codec = nullptr;
88  resample = other.resample;
89  other.resample = nullptr;
90  }
91  libav_handle& operator=(const libav_handle& other) = delete;
92  libav_handle& operator=(libav_handle&& other)
93  {
94  format = other.format;
95  other.format = nullptr;
96  stream = other.stream;
97  other.stream = nullptr;
98  codec = other.codec;
99  other.codec = nullptr;
100  resample = other.resample;
101  other.resample = nullptr;
102  return *this;
103  }
104  ~libav_handle()
105  {
106  if(format)
107  cleanup();
108  }
109 
110  void cleanup()
111  {
112  stream = nullptr;
113  if(resample)
114  swr_free(&resample);
115  if(codec)
116  {
117  avcodec_free_context(&codec);
118  }
119  if(format)
120  {
121  avformat_close_input(&format);
122  avformat_free_context(format);
123  format = nullptr;
124  }
125  }
126 
127  void open(const std::string& path, int stream_index, int target_rate) noexcept
128  {
129  if(avformat_open_input(&format, path.c_str(), nullptr, nullptr) != 0)
130  {
131  cleanup();
132  return;
133  }
134  if(avformat_find_stream_info(format, nullptr) < 0)
135  {
136  cleanup();
137  return;
138  }
139  if(stream_index < 0 || stream_index >= format->nb_streams)
140  {
141  cleanup();
142  return;
143  }
144  if(format->streams[stream_index]->codecpar->codec_type != AVMEDIA_TYPE_AUDIO)
145  {
146  cleanup();
147  return;
148  }
149 
150  stream = format->streams[stream_index];
151 
152  auto cdc = avcodec_find_decoder(stream->codecpar->codec_id);
153  if(!cdc)
154  {
155  cleanup();
156  return;
157  }
158 
159  codec = avcodec_alloc_context3(cdc);
160  if(avcodec_parameters_to_context(codec, stream->codecpar) < 0)
161  {
162  cleanup();
163  return;
164  }
165 
166  if(avcodec_open2(codec, cdc, nullptr) < 0)
167  {
168  cleanup();
169  return;
170  }
171 
172  AVSampleFormat out_sample_fmt{AV_SAMPLE_FMT_FLT};
173  AVSampleFormat in_sample_fmt{(AVSampleFormat)stream->codecpar->format};
174  int in_sample_rate = stream->codecpar->sample_rate;
175  int out_sample_rate = target_rate == 0 ? in_sample_rate : target_rate;
176 
177 #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 24, 100)
178  AVChannelLayout out_layout{stream->codecpar->ch_layout};
179  AVChannelLayout in_layout{stream->codecpar->ch_layout};
180 
181  swr_alloc_set_opts2(
182  &resample, &out_layout, out_sample_fmt, out_sample_rate, &in_layout,
183  in_sample_fmt, in_sample_rate, 0, nullptr);
184 #else
185  int64_t out_layout = stream->codecpar->channel_layout;
186  int64_t in_layout = stream->codecpar->channel_layout;
187 
188  resample = swr_alloc_set_opts(
189  resample, out_layout, out_sample_fmt, out_sample_rate, in_layout, in_sample_fmt,
190  in_sample_rate, 0, nullptr);
191 #endif
192  if(resample)
193  swr_init(resample);
194  }
195 
196  int rate() const noexcept { return stream->codecpar->sample_rate; }
197  int channels() const noexcept { return avstream_get_audio_channels(*stream); }
198 
199  int64_t totalPCMFrameCount() const noexcept
200  {
201  if(stream->duration > 0)
202  {
203  return stream->duration;
204  }
205  else if(format->duration > 0)
206  {
207  double seconds = format->duration / double(AV_TIME_BASE);
208  double frames = seconds * stream->codecpar->sample_rate;
209  return frames;
210  }
211  else if(stream->nb_frames > 0)
212  {
213  return av_rescale_q(
214  stream->nb_frames, stream->r_frame_rate,
215  AVRational{1, stream->codecpar->sample_rate});
216  }
217  else
218  {
219  return 0;
220  }
221  }
222 
223  operator bool() const noexcept { return bool(format); }
224 
225  void fetch(int64_t frame, int samples_to_write, auto func)
226  {
227  // First seek
228  ossia::seek_to_flick(
229  format, codec, stream,
230  ossia::flicks_per_second<double> * frame / stream->codecpar->sample_rate,
231  AVSEEK_FLAG_ANY);
232 
233  const std::size_t channels = this->channels();
234  std::vector<float> tmp;
235  if(channels == 0)
236  return;
237 
238  int processed = 0;
239  while(processed < samples_to_write)
240  {
241  // Need to fetch more data
242  {
243  auto packet = av_packet_alloc();
244  int ret{};
245  {
246  ret = av_read_frame(format, packet);
247 
248  while(ret >= 0 && ret != AVERROR(EOF) && packet->stream_index != stream->index)
249  {
250  av_packet_unref(packet);
251  ret = av_read_frame(format, packet);
252  }
253  if(ret == AVERROR(EOF))
254  {
255  break;
256  }
257  }
258  if(ret < 0)
259  {
260  return;
261  }
262 
263  ret = avcodec_send_packet(codec, packet);
264  if(ret == 0)
265  {
266  auto avframe = av_frame_alloc();
267  ret = avcodec_receive_frame(codec, avframe);
268  if(ret == 0)
269  {
270  const int av_frame_start = avframe->best_effort_timestamp;
271  const int samples = avframe->nb_samples;
272 
273  // It's possible that we get a frame that is just after what we have asked, thus
274  // frame - av_frame_start can sometimes be negative
275  const int offset = (frame < av_frame_start) ? 0 : (frame - av_frame_start);
276  if(offset >= samples)
277  {
278  // ffmpeg didn't even manage to seek to the correct frame, we have to read another
279  av_frame_free(&avframe);
280  av_packet_unref(packet);
281  av_packet_free(&packet);
282  continue;
283  }
284 
285  // The actual samples we can read here:
286  //samples = samples - (frame - av_frame_start);
287 
288  tmp.resize(samples * channels);
289  float* out_ptr = tmp.data();
290 
291  // Note: this function is only called with swr_convert's rate not changing
292  int read_samples = swr_convert(
293  resample, (uint8_t**)&out_ptr, samples,
294  (const uint8_t**)avframe->extended_data, samples);
295 
296  auto end = tmp.data() + tmp.size();
297 
298  read_samples -= offset;
299  if(read_samples <= 0)
300  {
301  av_frame_free(&avframe);
302  av_packet_unref(packet);
303  av_packet_free(&packet);
304  return;
305  }
306 
307  out_ptr += offset * channels;
308 
309  for(int i = 0; i < read_samples; i++)
310  {
311  func(out_ptr, end);
312 
313  processed++;
314  if(processed == samples_to_write)
315  {
316  av_frame_free(&avframe);
317  av_packet_unref(packet);
318  av_packet_free(&packet);
319  return;
320  }
321 
322  out_ptr += channels;
323  }
324 
325  // We read an entire packet: set "frame" to the beginning of next packet
326  // so that we read it in full
327  frame = avframe->best_effort_timestamp + avframe->nb_samples;
328  }
329  av_frame_free(&avframe);
330  }
331  av_packet_unref(packet);
332  av_packet_free(&packet);
333  }
334  }
335  }
336 };
337 
338 }
339 #endif
Definition: git_info.h:7