Shuvit game master repo. http://shuvit.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tinytag.py 48KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. #!/usr/bin/python3
  2. # -*- coding: utf-8 -*-
  3. #
  4. # tinytag - an audio meta info reader
  5. # Copyright (c) 2014-2018 Tom Wallroth
  6. #
  7. # Sources on github:
  8. # http://github.com/devsnd/tinytag/
  9. # MIT License
  10. # Copyright (c) 2014-2018 Tom Wallroth
  11. # Permission is hereby granted, free of charge, to any person obtaining a copy
  12. # of this software and associated documentation files (the "Software"), to deal
  13. # in the Software without restriction, including without limitation the rights
  14. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. # copies of the Software, and to permit persons to whom the Software is
  16. # furnished to do so, subject to the following conditions:
  17. # The above copyright notice and this permission notice shall be included in all
  18. # copies or substantial portions of the Software.
  19. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  25. # SOFTWARE.
  26. from __future__ import print_function
  27. from collections import MutableMapping
  28. import codecs
  29. from functools import reduce
  30. import struct
  31. import os
  32. import io
  33. import sys
  34. from io import BytesIO
  35. DEBUG = False # some of the parsers will print some debug info when set to True
  36. class TinyTagException(Exception):
  37. pass
  38. def _read(fh, nbytes): # helper function to check if we haven't reached EOF
  39. b = fh.read(nbytes)
  40. if len(b) < nbytes:
  41. raise TinyTagException('Unexpected end of file')
  42. return b
  43. def stderr(*args):
  44. sys.stderr.write('%s\n' % ' '.join(args))
  45. sys.stderr.flush()
  46. def _bytes_to_int_le(b):
  47. fmt = {1: '<B', 2: '<H', 4: '<I', 8: '<Q'}.get(len(b))
  48. return struct.unpack(fmt, b)[0] if fmt is not None else 0
  49. def _bytes_to_int(b):
  50. return reduce(lambda accu, elem: (accu << 8) + elem, b, 0)
  51. class TinyTag(object):
  52. def __init__(self, filehandler, filesize):
  53. self._filehandler = filehandler
  54. self.filesize = filesize
  55. self.album = None
  56. self.albumartist = None
  57. self.artist = None
  58. self.audio_offset = 0
  59. self.bitrate = 0.0 # must be float for later VBR calculations
  60. self.channels = None
  61. self.disc = None
  62. self.disc_total = None
  63. self.duration = 0
  64. self.genre = None
  65. self.samplerate = None
  66. self.title = None
  67. self.track = None
  68. self.track_total = None
  69. self.year = None
  70. self._load_image = False
  71. self._image_data = None
  72. def get_image(self):
  73. return self._image_data
  74. def has_all_tags(self):
  75. """check if all tags are already defined. Useful for ID3 tags
  76. since multiple kinds of tags can be in one audio file"""
  77. return all((self.track, self.track_total, self.title, self.artist,
  78. self.album, self.albumartist, self.year, self.genre))
  79. @classmethod
  80. def get(cls, filename, tags=True, duration=True, image=False):
  81. parser_class = None
  82. size = os.path.getsize(filename)
  83. if not size > 0:
  84. return TinyTag(None, 0)
  85. if cls == TinyTag: # if `get` is invoked on TinyTag, find parser by ext
  86. mapping = {
  87. ('.mp3',): ID3,
  88. ('.oga', '.ogg', '.opus'): Ogg,
  89. ('.wav',): Wave,
  90. ('.flac',): Flac,
  91. ('.wma',): Wma,
  92. ('.m4a', '.mp4'): MP4,
  93. }
  94. # choose which tag reader should be used by file extension
  95. for fileextension, tagclass in mapping.items():
  96. if filename.lower().endswith(fileextension):
  97. parser_class = tagclass
  98. break
  99. else: # otherwise use the class on which `get` was invoked
  100. parser_class = cls
  101. if parser_class is None:
  102. raise LookupError('No tag reader found to support filetype! ')
  103. with io.open(filename, 'rb') as af:
  104. tag = parser_class(af, size)
  105. tag.load(tags=tags, duration=duration, image=image)
  106. return tag
  107. def __str__(self):
  108. return str(dict(
  109. (k, v) for k, v in self.__dict__.items() if not k.startswith('_')
  110. ))
  111. def __repr__(self):
  112. return str(self)
  113. def load(self, tags, duration, image=False):
  114. if image:
  115. self._load_image = True
  116. if tags:
  117. self._parse_tag(self._filehandler)
  118. if duration:
  119. if tags: # rewind file if the tags were already parsed
  120. self._filehandler.seek(0)
  121. self._determine_duration(self._filehandler)
  122. def _set_field(self, fieldname, bytestring, transfunc=None):
  123. """convienience function to set fields of the tinytag by name.
  124. the payload (bytestring) can be changed using the transfunc"""
  125. if getattr(self, fieldname): # do not overwrite existing data
  126. return
  127. value = bytestring if transfunc is None else transfunc(bytestring)
  128. if DEBUG:
  129. stderr('Setting field "%s" to "%s"' % (fieldname, value))
  130. if fieldname == 'genre' and value.isdigit() and int(value) < len(ID3.ID3V1_GENRES):
  131. # funky: id3v1 genre hidden in a id3v2 field
  132. value = ID3.ID3V1_GENRES[int(value)]
  133. if fieldname in ("track", "disc"):
  134. if type(value).__name__ in ('str', 'unicode') and '/' in value:
  135. current, total = value.split('/')[:2]
  136. setattr(self, "%s_total" % fieldname, total)
  137. else:
  138. current = value
  139. setattr(self, fieldname, current)
  140. else:
  141. setattr(self, fieldname, value)
  142. def _determine_duration(self, fh):
  143. raise NotImplementedError()
  144. def _parse_tag(self, fh):
  145. raise NotImplementedError()
  146. def update(self, other):
  147. # update the values of this tag with the values from another tag
  148. for key in ['track', 'track_total', 'title', 'artist',
  149. 'album', 'albumartist', 'year', 'duration',
  150. 'genre', 'disc', 'disc_total']:
  151. if not getattr(self, key) and getattr(other, key):
  152. setattr(self, key, getattr(other, key))
  153. @staticmethod
  154. def _unpad(s):
  155. # strings in mp3 and asf *may* be terminated with a zero byte at the end
  156. return s[:s.index('\x00')] if '\x00' in s else s
  157. class MP4(TinyTag):
  158. # see: https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/Metadata/Metadata.html
  159. # and: https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap2/qtff2.html
  160. class Parser:
  161. # https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW34
  162. ATOM_DECODER_BY_TYPE = {
  163. 0: lambda x: x, # 'reserved',
  164. 1: lambda x: codecs.decode(x, 'utf-8', 'replace'), # UTF-8
  165. 2: lambda x: codecs.decode(x, 'utf-16', 'replace'), # UTF-16
  166. 3: lambda x: codecs.decode(x, 's/jis', 'replace'), # S/JIS
  167. # 16: duration in millis
  168. 13: lambda x: x, # JPEG
  169. 14: lambda x: x, # PNG
  170. 21: lambda x: struct.unpack('>b', x)[0], # BE Signed int
  171. 22: lambda x: struct.unpack('>B', x)[0], # BE Unsigned int
  172. 23: lambda x: struct.unpack('>f', x)[0], # BE Float32
  173. 24: lambda x: struct.unpack('>d', x)[0], # BE Float64
  174. # 27: lambda x: x, # BMP
  175. # 28: lambda x: x, # QuickTime Metadata atom
  176. 65: lambda x: struct.unpack('b', x)[0], # 8-bit Signed int
  177. 66: lambda x: struct.unpack('>h', x)[0], # BE 16-bit Signed int
  178. 67: lambda x: struct.unpack('>i', x)[0], # BE 32-bit Signed int
  179. 74: lambda x: struct.unpack('>q', x)[0], # BE 64-bit Signed int
  180. 75: lambda x: struct.unpack('B', x)[0], # 8-bit Unsigned int
  181. 76: lambda x: struct.unpack('>H', x)[0], # BE 16-bit Unsigned int
  182. 77: lambda x: struct.unpack('>I', x)[0], # BE 32-bit Unsigned int
  183. 78: lambda x: struct.unpack('>Q', x)[0], # BE 64-bit Unsigned int
  184. }
  185. @classmethod
  186. def make_data_atom_parser(cls, fieldname):
  187. def parse_data_atom(data_atom):
  188. data_type = struct.unpack('>I', data_atom[:4])[0]
  189. conversion = cls.ATOM_DECODER_BY_TYPE.get(data_type)
  190. if conversion is None:
  191. stderr('Cannot convert data type: %s' % data_type)
  192. return {} # don't know how to convert data atom
  193. # skip header & null-bytes, convert rest
  194. return {fieldname: conversion(data_atom[8:])}
  195. return parse_data_atom
  196. @classmethod
  197. def make_number_parser(cls, fieldname1, fieldname2):
  198. def _(data_atom):
  199. number_data = data_atom[8:14]
  200. numbers = struct.unpack('>HHH', number_data)
  201. # for some reason the first number is always irrelevant.
  202. return {fieldname1: numbers[1], fieldname2: numbers[2]}
  203. return _
  204. @classmethod
  205. def parse_id3v1_genre(cls, data_atom):
  206. # dunno why the genre is offset by -1 but that's how mutagen does it
  207. idx = struct.unpack('>H', data_atom[8:])[0] - 1
  208. if idx < len(ID3.ID3V1_GENRES):
  209. return {'genre': ID3.ID3V1_GENRES[idx]}
  210. return {'genre': None}
  211. @classmethod
  212. def parse_audio_sample_entry(cls, data):
  213. # this atom also contains the esds atom:
  214. # https://ffmpeg.org/doxygen/0.6/mov_8c-source.html
  215. # http://xhelmboyx.tripod.com/formats/mp4-layout.txt
  216. datafh = BytesIO(data)
  217. datafh.seek(16, os.SEEK_CUR) # jump over version and flags
  218. channels = struct.unpack('>H', datafh.read(2))[0]
  219. datafh.seek(2, os.SEEK_CUR) # jump over bit_depth
  220. datafh.seek(2, os.SEEK_CUR) # jump over QT compr id & pkt size
  221. sr = struct.unpack('>I', datafh.read(4))[0]
  222. esds_atom_size = struct.unpack('>I', data[28:32])[0]
  223. esds_atom = BytesIO(data[36:36 + esds_atom_size])
  224. # http://sasperger.tistory.com/103
  225. esds_atom.seek(22, os.SEEK_CUR) # jump over most data...
  226. esds_atom.seek(4, os.SEEK_CUR) # jump over max bitrate
  227. avg_br = struct.unpack('>I', esds_atom.read(4))[0] / 1000 # kbit/s
  228. return {'channels': channels, 'samplerate': sr, 'bitrate': avg_br}
  229. @classmethod
  230. def parse_mvhd(cls, data):
  231. # http://stackoverflow.com/a/3639993/1191373
  232. walker = BytesIO(data)
  233. version = struct.unpack('b', walker.read(1))[0]
  234. walker.seek(3, os.SEEK_CUR) # jump over flags
  235. if version == 0: # uses 32 bit integers for timestamps
  236. walker.seek(8, os.SEEK_CUR) # jump over create & mod times
  237. time_scale = struct.unpack('>I', walker.read(4))[0]
  238. duration = struct.unpack('>I', walker.read(4))[0]
  239. else: # version == 1: # uses 64 bit integers for timestamps
  240. walker.seek(16, os.SEEK_CUR) # jump over create & mod times
  241. time_scale = struct.unpack('>I', walker.read(4))[0]
  242. duration = struct.unpack('>q', walker.read(8))[0]
  243. return {'duration': float(duration) / time_scale}
  244. @classmethod
  245. def debug_atom(cls, data):
  246. stderr(data) # use this function to inspect atoms in an atom tree
  247. return {}
  248. # The parser tree: Each key is an atom name which is traversed if existing.
  249. # Leaves of the parser tree are callables which receive the atom data.
  250. # callables return {fieldname: value} which is updates the TinyTag.
  251. META_DATA_TREE = {b'moov': {b'udta': {b'meta': {b'ilst': {
  252. # see: http://atomicparsley.sourceforge.net/mpeg-4files.html
  253. b'\xa9alb': {b'data': Parser.make_data_atom_parser('album')},
  254. b'\xa9ART': {b'data': Parser.make_data_atom_parser('artist')},
  255. b'aART': {b'data': Parser.make_data_atom_parser('albumartist')},
  256. # b'cpil': {b'data': Parser.make_data_atom_parser('compilation')},
  257. b'disk': {b'data': Parser.make_number_parser('disc', 'disc_total')},
  258. # b'\xa9wrt': {b'data': Parser.make_data_atom_parser('composer')},
  259. b'\xa9day': {b'data': Parser.make_data_atom_parser('year')},
  260. b'\xa9gen': {b'data': Parser.make_data_atom_parser('genre')},
  261. b'gnre': {b'data': Parser.parse_id3v1_genre},
  262. b'\xa9nam': {b'data': Parser.make_data_atom_parser('title')},
  263. b'trkn': {b'data': Parser.make_number_parser('track', 'track_total')},
  264. }}}}}
  265. # see: https://developer.apple.com/library/mac/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html
  266. AUDIO_DATA_TREE = {
  267. b'moov': {
  268. b'mvhd': Parser.parse_mvhd,
  269. b'trak': {b'mdia': {b"minf": {b"stbl": {b"stsd": {b'mp4a':
  270. Parser.parse_audio_sample_entry
  271. }}}}}
  272. }
  273. }
  274. IMAGE_DATA_TREE = {b'moov': {b'udta': {b'meta': {b'ilst': {
  275. b'covr': {b'data': Parser.make_data_atom_parser('_image_data')},
  276. }}}}}
  277. VERSIONED_ATOMS = set((b'meta', b'stsd')) # those have an extra 4 byte header
  278. FLAGGED_ATOMS = set((b'stsd',)) # these also have an extra 4 byte header
  279. def _determine_duration(self, fh):
  280. self._traverse_atoms(fh, path=self.AUDIO_DATA_TREE)
  281. def _parse_tag(self, fh):
  282. self._traverse_atoms(fh, path=self.META_DATA_TREE)
  283. if self._load_image: # A bit inefficient, we rewind the file
  284. self._filehandler.seek(0) # to parse it again for the image
  285. self._traverse_atoms(fh, path=self.IMAGE_DATA_TREE)
  286. def _traverse_atoms(self, fh, path, stop_pos=None, curr_path=None):
  287. header_size = 8
  288. atom_header = fh.read(header_size)
  289. while len(atom_header) == header_size:
  290. atom_size = struct.unpack('>I', atom_header[:4])[0] - header_size
  291. atom_type = atom_header[4:]
  292. if curr_path is None: # keep track how we traversed in the tree
  293. curr_path = [atom_type]
  294. if atom_size <= 0: # empty atom, jump to next one
  295. atom_header = fh.read(header_size)
  296. continue
  297. if DEBUG:
  298. stderr('%s pos: %d atom: %s len: %d' % (' ' * 4 * len(curr_path), fh.tell() - header_size, atom_type, atom_size + header_size))
  299. if atom_type in self.VERSIONED_ATOMS: # jump atom version for now
  300. fh.seek(4, os.SEEK_CUR)
  301. if atom_type in self.FLAGGED_ATOMS: # jump atom flags for now
  302. fh.seek(4, os.SEEK_CUR)
  303. sub_path = path.get(atom_type, None)
  304. # if the path leaf is a dict, traverse deeper into the tree:
  305. if issubclass(type(sub_path), MutableMapping):
  306. atom_end_pos = fh.tell() + atom_size
  307. self._traverse_atoms(fh, path=sub_path, stop_pos=atom_end_pos,
  308. curr_path=curr_path + [atom_type])
  309. # if the path-leaf is a callable, call it on the atom data
  310. elif callable(sub_path):
  311. for fieldname, value in sub_path(fh.read(atom_size)).items():
  312. if DEBUG:
  313. stderr(' ' * 4 * len(curr_path), 'FIELD: ', fieldname)
  314. if fieldname:
  315. self._set_field(fieldname, value)
  316. # if no action was specified using dict or callable, jump over atom
  317. else:
  318. fh.seek(atom_size, os.SEEK_CUR)
  319. # check if we have reached the end of this branch:
  320. if stop_pos and fh.tell() >= stop_pos:
  321. return # return to parent (next parent node in tree)
  322. atom_header = fh.read(header_size) # read next atom
  323. class ID3(TinyTag):
  324. FRAME_ID_TO_FIELD = { # Mapping from Frame ID to a field of the TinyTag
  325. 'TRCK': 'track', 'TRK': 'track',
  326. 'TYER': 'year', 'TYE': 'year',
  327. 'TALB': 'album', 'TAL': 'album',
  328. 'TPE1': 'artist', 'TP1': 'artist',
  329. 'TIT2': 'title', 'TT2': 'title',
  330. 'TCON': 'genre', 'TPOS': 'disc',
  331. 'TPE2': 'albumartist',
  332. }
  333. IMAGE_FRAME_IDS = set(['APIC', 'PIC'])
  334. PARSABLE_FRAME_IDS = set(FRAME_ID_TO_FIELD.keys()).union(IMAGE_FRAME_IDS)
  335. _MAX_ESTIMATION_SEC = 30
  336. _CBR_DETECTION_FRAME_COUNT = 5
  337. _USE_XING_HEADER = True # much faster, but can be deactivated for testing
  338. ID3V1_GENRES = [
  339. 'Blues', 'Classic Rock', 'Country', 'Dance', 'Disco',
  340. 'Funk', 'Grunge', 'Hip-Hop', 'Jazz', 'Metal', 'New Age', 'Oldies',
  341. 'Other', 'Pop', 'R&B', 'Rap', 'Reggae', 'Rock', 'Techno', 'Industrial',
  342. 'Alternative', 'Ska', 'Death Metal', 'Pranks', 'Soundtrack',
  343. 'Euro-Techno', 'Ambient', 'Trip-Hop', 'Vocal', 'Jazz+Funk', 'Fusion',
  344. 'Trance', 'Classical', 'Instrumental', 'Acid', 'House', 'Game',
  345. 'Sound Clip', 'Gospel', 'Noise', 'AlternRock', 'Bass', 'Soul', 'Punk',
  346. 'Space', 'Meditative', 'Instrumental Pop', 'Instrumental Rock',
  347. 'Ethnic', 'Gothic', 'Darkwave', 'Techno-Industrial', 'Electronic',
  348. 'Pop-Folk', 'Eurodance', 'Dream', 'Southern Rock', 'Comedy', 'Cult',
  349. 'Gangsta', 'Top 40', 'Christian Rap', 'Pop/Funk', 'Jungle',
  350. 'Native American', 'Cabaret', 'New Wave', 'Psychadelic', 'Rave',
  351. 'Showtunes', 'Trailer', 'Lo-Fi', 'Tribal', 'Acid Punk', 'Acid Jazz',
  352. 'Polka', 'Retro', 'Musical', 'Rock & Roll', 'Hard Rock',
  353. # Wimamp Extended Genres
  354. 'Folk', 'Folk-Rock', 'National Folk', 'Swing', 'Fast Fusion', 'Bebob',
  355. 'Latin', 'Revival', 'Celtic', 'Bluegrass', 'Avantgarde', 'Gothic Rock',
  356. 'Progressive Rock', 'Psychedelic Rock', 'Symphonic Rock', 'Slow Rock',
  357. 'Big Band', 'Chorus', 'Easy Listening', 'Acoustic', 'Humour', 'Speech',
  358. 'Chanson', 'Opera', 'Chamber Music', 'Sonata', 'Symphony', 'Booty Bass',
  359. 'Primus', 'Porn Groove', 'Satire', 'Slow Jam', 'Club', 'Tango', 'Samba',
  360. 'Folklore', 'Ballad', 'Power Ballad', 'Rhythmic Soul', 'Freestyle',
  361. 'Duet', 'Punk Rock', 'Drum Solo', 'A capella', 'Euro-House',
  362. 'Dance Hall', 'Goa', 'Drum & Bass',
  363. # according to https://de.wikipedia.org/wiki/Liste_der_ID3v1-Genres:
  364. 'Club-House', 'Hardcore Techno', 'Terror', 'Indie', 'BritPop',
  365. '', # don't use ethnic slur ("Negerpunk", WTF!)
  366. 'Polsk Punk', 'Beat', 'Christian Gangsta Rap', 'Heavy Metal',
  367. 'Black Metal', 'Contemporary Christian', 'Christian Rock',
  368. # WinAmp 1.91
  369. 'Merengue', 'Salsa', 'Thrash Metal', 'Anime', 'Jpop', 'Synthpop',
  370. # WinAmp 5.6
  371. 'Abstract', 'Art Rock', 'Baroque', 'Bhangra', 'Big Beat', 'Breakbeat',
  372. 'Chillout', 'Downtempo', 'Dub', 'EBM', 'Eclectic', 'Electro',
  373. 'Electroclash', 'Emo', 'Experimental', 'Garage', 'Illbient',
  374. 'Industro-Goth', 'Jam Band', 'Krautrock', 'Leftfield', 'Lounge',
  375. 'Math Rock', 'New Romantic', 'Nu-Breakz', 'Post-Punk', 'Post-Rock',
  376. 'Psytrance', 'Shoegaze', 'Space Rock', 'Trop Rock', 'World Music',
  377. 'Neoclassical', 'Audiobook', 'Audio Theatre', 'Neue Deutsche Welle',
  378. 'Podcast', 'Indie Rock', 'G-Funk', 'Dubstep', 'Garage Rock', 'Psybient',
  379. ]
  380. def __init__(self, filehandler, filesize):
  381. TinyTag.__init__(self, filehandler, filesize)
  382. # save position after the ID3 tag for duration mesurement speedup
  383. self._bytepos_after_id3v2 = 0
  384. @classmethod
  385. def set_estimation_precision(cls, estimation_in_seconds):
  386. cls._MAX_ESTIMATION_SEC = estimation_in_seconds
  387. # see this page for the magic values used in mp3:
  388. # http://www.mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm
  389. samplerates = [
  390. [11025, 12000, 8000], # MPEG 2.5
  391. [], # reserved
  392. [22050, 24000, 16000], # MPEG 2
  393. [44100, 48000, 32000], # MPEG 1
  394. ]
  395. v1l1 = [0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0]
  396. v1l2 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0]
  397. v1l3 = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0]
  398. v2l1 = [0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0]
  399. v2l2 = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0]
  400. v2l3 = v2l2
  401. bitrate_by_version_by_layer = [
  402. [None, v2l3, v2l2, v2l1], # MPEG Version 2.5 # note that the layers go
  403. None, # reserved # from 3 to 1 by design.
  404. [None, v2l3, v2l2, v2l1], # MPEG Version 2 # the first layer id is
  405. [None, v1l3, v1l2, v1l1], # MPEG Version 1 # reserved
  406. ]
  407. samples_per_frame = 1152 # the default frame size for mp3
  408. channels_per_channel_mode = [
  409. 2, # 00 Stereo
  410. 2, # 01 Joint stereo (Stereo)
  411. 2, # 10 Dual channel (2 mono channels)
  412. 1, # 11 Single channel (Mono)
  413. ]
  414. def _parse_xing_header(self, fh):
  415. # see: http://www.mp3-tech.org/programmer/sources/vbrheadersdk.zip
  416. fh.seek(4, os.SEEK_CUR) # read over Xing header
  417. header_flags = struct.unpack('>i', fh.read(4))[0]
  418. frames = byte_count = toc = vbr_scale = None
  419. if header_flags & 1: # FRAMES FLAG
  420. frames = struct.unpack('>i', fh.read(4))[0]
  421. if header_flags & 2: # BYTES FLAG
  422. byte_count = struct.unpack('>i', fh.read(4))[0]
  423. if header_flags & 4: # TOC FLAG
  424. toc = [struct.unpack('>i', fh.read(4))[0] for _ in range(100)]
  425. if header_flags & 8: # VBR SCALE FLAG
  426. vbr_scale = struct.unpack('>i', fh.read(4))[0]
  427. return frames, byte_count, toc, vbr_scale
  428. def _determine_duration(self, fh):
  429. max_estimation_frames = (ID3._MAX_ESTIMATION_SEC * 44100) // ID3.samples_per_frame
  430. frame_size_accu = 0
  431. header_bytes = 4
  432. frames = 0 # count frames for determining mp3 duration
  433. bitrate_accu = 0 # add up bitrates to find average bitrate to detect
  434. last_bitrates = [] # CBR mp3s (multiple frames with same bitrates)
  435. # seek to first position after id3 tag (speedup for large header)
  436. fh.seek(self._bytepos_after_id3v2)
  437. while True:
  438. # reading through garbage until 11 '1' sync-bits are found
  439. b = fh.peek(4)
  440. if len(b) < 4:
  441. break # EOF
  442. sync, conf, bitrate_freq, rest = struct.unpack('BBBB', b[0:4])
  443. br_id = (bitrate_freq >> 4) & 0x0F # biterate id
  444. sr_id = (bitrate_freq >> 2) & 0x03 # sample rate id
  445. padding = 1 if bitrate_freq & 0x02 > 0 else 0
  446. mpeg_id = (conf >> 3) & 0x03
  447. layer_id = (conf >> 1) & 0x03
  448. channel_mode = (rest >> 6) & 0x03
  449. self.channels = self.channels_per_channel_mode[channel_mode]
  450. # check for eleven 1s, validate bitrate and sample rate
  451. if not b[:2] > b'\xFF\xE0' or br_id > 14 or br_id == 0 or sr_id == 3:
  452. idx = b.find(b'\xFF', 1) # invalid frame, find next sync header
  453. if idx == -1:
  454. idx = len(b) # not found: jump over the current peek buffer
  455. fh.seek(max(idx, 1), os.SEEK_CUR)
  456. continue
  457. try:
  458. self.samplerate = ID3.samplerates[mpeg_id][sr_id]
  459. frame_bitrate = ID3.bitrate_by_version_by_layer[mpeg_id][layer_id][br_id]
  460. except (IndexError, TypeError):
  461. raise TinyTagException('mp3 parsing failed')
  462. # There might be a xing header in the first frame that contains
  463. # all the info we need, otherwise parse multiple frames to find the
  464. # accurate average bitrate
  465. if frames == 0 and ID3._USE_XING_HEADER:
  466. xing_header_offset = b.find(b'Xing')
  467. if xing_header_offset != -1:
  468. fh.seek(xing_header_offset, os.SEEK_CUR)
  469. xframes, byte_count, toc, vbr_scale = self._parse_xing_header(fh)
  470. if xframes and xframes != 0 and byte_count:
  471. self.duration = xframes * ID3.samples_per_frame / float(self.samplerate)
  472. self.bitrate = byte_count * 8 / self.duration / 1000
  473. self.audio_offset = fh.tell()
  474. return
  475. continue
  476. frames += 1 # it's most probably an mp3 frame
  477. bitrate_accu += frame_bitrate
  478. if frames == 1:
  479. self.audio_offset = fh.tell()
  480. if frames <= ID3._CBR_DETECTION_FRAME_COUNT:
  481. last_bitrates.append(frame_bitrate)
  482. fh.seek(4, os.SEEK_CUR) # jump over peeked bytes
  483. frame_length = (144000 * frame_bitrate) // self.samplerate + padding
  484. frame_size_accu += frame_length
  485. # if bitrate does not change over time its probably CBR
  486. is_cbr = (frames == ID3._CBR_DETECTION_FRAME_COUNT and
  487. len(set(last_bitrates)) == 1)
  488. if frames == max_estimation_frames or is_cbr:
  489. # try to estimate duration
  490. fh.seek(-128, 2) # jump to last byte (leaving out id3v1 tag)
  491. audio_stream_size = fh.tell() - self.audio_offset
  492. est_frame_count = audio_stream_size / (frame_size_accu / float(frames))
  493. samples = est_frame_count * ID3.samples_per_frame
  494. self.duration = samples / float(self.samplerate)
  495. self.bitrate = bitrate_accu / frames
  496. return
  497. if frame_length > 1: # jump over current frame body
  498. fh.seek(frame_length - header_bytes, os.SEEK_CUR)
  499. if self.samplerate:
  500. self.duration = frames * ID3.samples_per_frame / float(self.samplerate)
  501. def _parse_tag(self, fh):
  502. self._parse_id3v2(fh)
  503. if not self.has_all_tags() and self.filesize > 128:
  504. fh.seek(-128, os.SEEK_END) # try parsing id3v1 in last 128 bytes
  505. self._parse_id3v1(fh)
  506. def _parse_id3v2(self, fh):
  507. # for info on the specs, see: http://id3.org/Developer%20Information
  508. header = struct.unpack('3sBBB4B', _read(fh, 10))
  509. tag = codecs.decode(header[0], 'ISO-8859-1')
  510. # check if there is an ID3v2 tag at the beginning of the file
  511. if tag == 'ID3':
  512. major, rev = header[1:3]
  513. if DEBUG:
  514. stderr('Found id3 v2.%s' % major)
  515. # unsync = (header[3] & 0x80) > 0
  516. extended = (header[3] & 0x40) > 0
  517. # experimental = (header[3] & 0x20) > 0
  518. # footer = (header[3] & 0x10) > 0
  519. size = self._calc_size(header[4:8], 7)
  520. self._bytepos_after_id3v2 = size
  521. parsed_size = 0
  522. if extended: # just read over the extended header.
  523. size_bytes = struct.unpack('4B', _read(fh, 6)[0:4])
  524. extd_size = self._calc_size(size_bytes, 7)
  525. fh.seek(extd_size - 6, os.SEEK_CUR) # jump over extended_header
  526. while parsed_size < size:
  527. frame_size = self._parse_frame(fh, id3version=major)
  528. if frame_size == 0:
  529. break
  530. parsed_size += frame_size
  531. def _parse_id3v1(self, fh):
  532. if fh.read(3) == b'TAG': # check if this is an ID3 v1 tag
  533. def asciidecode(x):
  534. return self._unpad(codecs.decode(x, 'latin1'))
  535. fields = fh.read(30 + 30 + 30 + 4 + 30 + 1)
  536. self._set_field('title', fields[:30], transfunc=asciidecode)
  537. self._set_field('artist', fields[30:60], transfunc=asciidecode)
  538. self._set_field('album', fields[60:90], transfunc=asciidecode)
  539. self._set_field('year', fields[90:94], transfunc=asciidecode)
  540. comment = fields[94:124]
  541. if b'\x00\x00' < comment[-2:] < b'\x01\x00':
  542. self._set_field('track', str(ord(comment[-1:])))
  543. genre_id = ord(fields[124:125])
  544. if genre_id < len(ID3.ID3V1_GENRES):
  545. self.genre = ID3.ID3V1_GENRES[genre_id]
  546. def _parse_frame(self, fh, id3version=False):
  547. # ID3v2.2 especially ugly. see: http://id3.org/id3v2-00
  548. frame_header_size = 6 if id3version == 2 else 10
  549. frame_size_bytes = 3 if id3version == 2 else 4
  550. binformat = '3s3B' if id3version == 2 else '4s4B2B'
  551. bits_per_byte = 7 if id3version == 4 else 8 # only id3v2.4 is synchsafe
  552. frame_header_data = fh.read(frame_header_size)
  553. if len(frame_header_data) == 0:
  554. return 0
  555. frame = struct.unpack(binformat, frame_header_data)
  556. frame_id = self._decode_string(frame[0])
  557. frame_size = self._calc_size(frame[1:1+frame_size_bytes], bits_per_byte)
  558. if DEBUG:
  559. stderr('Found Frame %s at %d-%d' % (frame_id, fh.tell(), fh.tell() + frame_size))
  560. if frame_size > 0:
  561. # flags = frame[1+frame_size_bytes:] # dont care about flags.
  562. if not frame_id in ID3.PARSABLE_FRAME_IDS: # jump over unparsable frames
  563. fh.seek(frame_size, os.SEEK_CUR)
  564. return frame_size
  565. content = fh.read(frame_size)
  566. fieldname = ID3.FRAME_ID_TO_FIELD.get(frame_id)
  567. if fieldname:
  568. self._set_field(fieldname, content, self._decode_string)
  569. elif frame_id in self.IMAGE_FRAME_IDS and self._load_image:
  570. # See section 4.14: http://id3.org/id3v2.4.0-frames
  571. if frame_id == 'PIC': # ID3 v2.2:
  572. desc_end_pos = content.index(b'\x00', 1) + 1
  573. else: # ID3 v2.3+
  574. mimetype_end_pos = content.index(b'\x00', 1) + 1
  575. desc_start_pos = mimetype_end_pos + 1 # jump over picture type
  576. desc_end_pos = content.index(b'\x00', desc_start_pos) + 1
  577. if content[desc_end_pos:desc_end_pos+1] == b'\x00':
  578. desc_end_pos += 1 # the description ends with 1 or 2 null bytes
  579. self._image_data = content[desc_end_pos:]
  580. return frame_size
  581. return 0
  582. def _decode_string(self, b):
  583. try: # it's not my fault, this is the spec.
  584. first_byte = b[:1]
  585. if first_byte == b'\x00': # ISO-8859-1
  586. return self._unpad(codecs.decode(b[1:], 'ISO-8859-1'))
  587. elif first_byte == b'\x01': # UTF-16 with BOM
  588. # read byte order mark to determine endianess
  589. encoding = 'UTF-16be' if b[1:3] == b'\xfe\xff' else 'UTF-16le'
  590. # strip the bom and optional null bytes
  591. bytestr = b[3:-1] if len(b) % 2 == 0 else b[3:]
  592. return self._unpad(codecs.decode(bytestr, encoding))
  593. elif first_byte == b'\x02': # UTF-16LE
  594. # strip optional null byte, if byte count uneven
  595. bytestr = b[1:-1] if len(b) % 2 == 0 else b[1:]
  596. return self._unpad(codecs.decode(bytestr, 'UTF-16le'))
  597. elif first_byte == b'\x03': # UTF-8
  598. return codecs.decode(b[1:], 'UTF-8')
  599. return self._unpad(codecs.decode(b, 'ISO-8859-1')) # wild guess
  600. except UnicodeDecodeError:
  601. raise TinyTagException('Error decoding ID3 Tag!')
  602. def _calc_size(self, bytestr, bits_per_byte):
  603. # length of some mp3 header fields is described by 7 or 8-bit-bytes
  604. return reduce(lambda accu, elem: (accu << bits_per_byte) + elem, bytestr, 0)
  605. class Ogg(TinyTag):
  606. def __init__(self, filehandler, filesize):
  607. TinyTag.__init__(self, filehandler, filesize)
  608. self._tags_parsed = False
  609. self._max_samplenum = 0 # maximum sample position ever read
  610. def _determine_duration(self, fh):
  611. MAX_PAGE_SIZE = 65536 # https://xiph.org/ogg/doc/libogg/ogg_page.html
  612. if not self._tags_parsed:
  613. self._parse_tag(fh) # determine sample rate
  614. fh.seek(0) # and rewind to start
  615. if self.filesize > MAX_PAGE_SIZE:
  616. fh.seek(-MAX_PAGE_SIZE, 2) # go to last possible page position
  617. while True:
  618. b = fh.peek(4)
  619. if len(b) == 0:
  620. return # EOF
  621. if b[:4] == b'OggS': # look for an ogg header
  622. for packet in self._parse_pages(fh):
  623. pass # parse all remaining pages
  624. self.duration = self._max_samplenum / float(self.samplerate)
  625. else:
  626. idx = b.find(b'OggS') # try to find header in peeked data
  627. seekpos = idx if idx != -1 else len(b) - 3
  628. fh.seek(max(seekpos, 1), os.SEEK_CUR)
  629. def _parse_tag(self, fh):
  630. page_start_pos = fh.tell() # set audio_offest later if its audio data
  631. for packet in self._parse_pages(fh):
  632. walker = BytesIO(packet)
  633. if packet[0:7] == b"\x01vorbis":
  634. (channels, self.samplerate, max_bitrate, bitrate,
  635. min_bitrate) = struct.unpack("<B4i", packet[11:28])
  636. if not self.audio_offset:
  637. self.bitrate = bitrate / 1024
  638. self.audio_offset = page_start_pos
  639. elif packet[0:7] == b"\x03vorbis":
  640. walker.seek(7, os.SEEK_CUR) # jump over header name
  641. self._parse_vorbis_comment(walker)
  642. elif packet[0:8] == b'OpusHead': # parse opus header
  643. # https://www.videolan.org/developers/vlc/modules/codec/opus_header.c
  644. # https://mf4.xiph.org/jenkins/view/opus/job/opusfile-unix/ws/doc/html/structOpusHead.html
  645. walker.seek(8, os.SEEK_CUR) # jump over header name
  646. (version, ch, _, sr, _, _) = struct.unpack("<BBHIHB", walker.read(11))
  647. if (version & 0xF0) == 0: # only major version 0 supported
  648. self.channels = ch
  649. self.samplerate = sr
  650. elif packet[0:8] == b'OpusTags': # parse opus metadata:
  651. walker.seek(8, os.SEEK_CUR) # jump over header name
  652. self._parse_vorbis_comment(walker)
  653. else:
  654. break
  655. page_start_pos = fh.tell()
  656. def _parse_vorbis_comment(self, fh):
  657. # for the spec, see: http://xiph.org/vorbis/doc/v-comment.html
  658. # discnumber tag based on: https://en.wikipedia.org/wiki/Vorbis_comment
  659. comment_type_to_attr_mapping = {
  660. 'album': 'album',
  661. 'albumartist': 'albumartist',
  662. 'title': 'title',
  663. 'artist': 'artist',
  664. 'date': 'year',
  665. 'tracknumber': 'track',
  666. 'discnumber': 'disc',
  667. 'genre': 'genre'
  668. }
  669. vendor_length = struct.unpack('I', fh.read(4))[0]
  670. fh.seek(vendor_length, os.SEEK_CUR) # jump over vendor
  671. elements = struct.unpack('I', fh.read(4))[0]
  672. for i in range(elements):
  673. length = struct.unpack('I', fh.read(4))[0]
  674. keyvalpair = codecs.decode(fh.read(length), 'UTF-8')
  675. if '=' in keyvalpair:
  676. key, value = keyvalpair.split('=', 1)
  677. fieldname = comment_type_to_attr_mapping.get(key.lower())
  678. if fieldname:
  679. self._set_field(fieldname, value)
  680. def _parse_pages(self, fh):
  681. # for the spec, see: https://wiki.xiph.org/Ogg
  682. previous_page = b'' # contains data from previous (continuing) pages
  683. header_data = fh.read(27) # read ogg page header
  684. while len(header_data) != 0:
  685. header = struct.unpack('<4sBBqIIiB', header_data)
  686. oggs, version, flags, pos, serial, pageseq, crc, segments = header
  687. self._max_samplenum = max(self._max_samplenum, pos)
  688. if oggs != b'OggS' or version != 0:
  689. raise TinyTagException('Not a valid ogg file!')
  690. segsizes = struct.unpack('B'*segments, fh.read(segments))
  691. total = 0
  692. for segsize in segsizes: # read all segments
  693. total += segsize
  694. if total < 255: # less than 255 bytes means end of page
  695. yield previous_page + fh.read(total)
  696. previous_page = b''
  697. total = 0
  698. if total != 0:
  699. if total % 255 == 0:
  700. previous_page += fh.read(total)
  701. else:
  702. yield previous_page + fh.read(total)
  703. previous_page = b''
  704. header_data = fh.read(27)
  705. class Wave(TinyTag):
  706. def __init__(self, filehandler, filesize):
  707. TinyTag.__init__(self, filehandler, filesize)
  708. self._duration_parsed = False
  709. def _determine_duration(self, fh):
  710. # see: https://ccrma.stanford.edu/courses/422/projects/WaveFormat/
  711. # and: https://en.wikipedia.org/wiki/WAV
  712. riff, size, fformat = struct.unpack('4sI4s', fh.read(12))
  713. if riff != b'RIFF' or fformat != b'WAVE':
  714. raise TinyTagException('not a wave file!')
  715. channels, bitdepth = 2, 16 # assume CD quality
  716. chunk_header = fh.read(8)
  717. while len(chunk_header) == 8:
  718. subchunkid, subchunksize = struct.unpack('4sI', chunk_header)
  719. if subchunkid == b'fmt ':
  720. _, channels, self.samplerate = struct.unpack('HHI', fh.read(8))
  721. _, _, bitdepth = struct.unpack('<IHH', fh.read(8))
  722. self.bitrate = self.samplerate * channels * bitdepth / 1024
  723. elif subchunkid == b'data':
  724. self.duration = float(subchunksize)/channels/self.samplerate/(bitdepth/8)
  725. self.audio_offest = fh.tell() - 8 # rewind to data header
  726. fh.seek(subchunksize, 1)
  727. elif subchunkid == b'id3 ' or subchunkid == b'ID3 ':
  728. id3 = ID3(fh, 0)
  729. id3._parse_id3v2(fh)
  730. self.update(id3)
  731. else: # some other chunk, just skip the data
  732. fh.seek(subchunksize, 1)
  733. chunk_header = fh.read(8)
  734. self._duration_parsed = True
  735. def _parse_tag(self, fh):
  736. if not self._duration_parsed:
  737. self._determine_duration(fh) # parse whole file to determine tags:(
  738. class Flac(TinyTag):
  739. METADATA_STREAMINFO = 0
  740. METADATA_VORBIS_COMMENT = 4
  741. def load(self, tags, duration, image=False):
  742. if self._filehandler.read(4) != b'fLaC':
  743. raise TinyTagException('Invalid flac header')
  744. self._determine_duration(self._filehandler, skip_tags=not tags)
  745. def _determine_duration(self, fh, skip_tags=False):
  746. # for spec, see https://xiph.org/flac/ogg_mapping.html
  747. header_data = fh.read(4)
  748. while len(header_data):
  749. meta_header = struct.unpack('B3B', header_data)
  750. block_type = meta_header[0] & 0x7f
  751. is_last_block = meta_header[0] & 0x80
  752. size = _bytes_to_int(meta_header[1:4])
  753. # http://xiph.org/flac/format.html#metadata_block_streaminfo
  754. if block_type == Flac.METADATA_STREAMINFO:
  755. stream_info_header = fh.read(size)
  756. if len(stream_info_header) < 34: # invalid streaminfo
  757. break
  758. header = struct.unpack('HH3s3s8B16s', stream_info_header)
  759. # From the ciph documentation:
  760. # py | <bits>
  761. # ----------------------------------------------
  762. # H | <16> The minimum block size (in samples)
  763. # H | <16> The maximum block size (in samples)
  764. # 3s | <24> The minimum frame size (in bytes)
  765. # 3s | <24> The maximum frame size (in bytes)
  766. # 8B | <20> Sample rate in Hz.
  767. # | <3> (number of channels)-1.
  768. # | <5> (bits per sample)-1.
  769. # | <36> Total samples in stream.
  770. # 16s| <128> MD5 signature
  771. #
  772. min_blk, max_blk, min_frm, max_frm = header[0:4]
  773. min_frm = _bytes_to_int(struct.unpack('3B', min_frm))
  774. max_frm = _bytes_to_int(struct.unpack('3B', max_frm))
  775. # channels-
  776. # `. bits total samples
  777. # |----- samplerate -----| |-||----| |---------~ ~----|
  778. # 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000
  779. # #---4---# #---5---# #---6---# #---7---# #--8-~ ~-12-#
  780. self.samplerate = _bytes_to_int(header[4:7]) >> 4
  781. self.channels = ((header[6] >> 1) & 0x07) + 1
  782. # bit_depth = ((header[6] & 1) << 4) + ((header[7] & 0xF0) >> 4)
  783. # bit_depth = (bit_depth + 1)
  784. total_sample_bytes = [(header[7] & 0x0F)] + list(header[8:12])
  785. total_samples = _bytes_to_int(total_sample_bytes)
  786. self.duration = float(total_samples) / self.samplerate
  787. if self.duration > 0:
  788. self.bitrate = self.filesize / self.duration * 8 / 1024
  789. elif block_type == Flac.METADATA_VORBIS_COMMENT and not skip_tags:
  790. oggtag = Ogg(fh, 0)
  791. oggtag._parse_vorbis_comment(fh)
  792. self.update(oggtag)
  793. elif block_type >= 127:
  794. return # invalid block type
  795. else:
  796. fh.seek(size, 1) # seek over this block
  797. if is_last_block:
  798. break
  799. else:
  800. header_data = fh.read(4)
  801. class Wma(TinyTag):
  802. ASF_CONTENT_DESCRIPTION_OBJECT = b'3&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel'
  803. ASF_EXTENDED_CONTENT_DESCRIPTION_OBJECT = b'@\xa4\xd0\xd2\x07\xe3\xd2\x11\x97\xf0\x00\xa0\xc9^\xa8P'
  804. STREAM_BITRATE_PROPERTIES_OBJECT = b'\xceu\xf8{\x8dF\xd1\x11\x8d\x82\x00`\x97\xc9\xa2\xb2'
  805. ASF_FILE_PROPERTY_OBJECT = b'\xa1\xdc\xab\x8cG\xa9\xcf\x11\x8e\xe4\x00\xc0\x0c Se'
  806. ASF_STREAM_PROPERTIES_OBJECT = b'\x91\x07\xdc\xb7\xb7\xa9\xcf\x11\x8e\xe6\x00\xc0\x0c Se'
  807. STREAM_TYPE_ASF_AUDIO_MEDIA = b'@\x9ei\xf8M[\xcf\x11\xa8\xfd\x00\x80_\\D+'
  808. # see:
  809. # http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx
  810. # and (japanese, but none the less helpful)
  811. # http://uguisu.skr.jp/Windows/format_asf.html
  812. def __init__(self, filehandler, filesize):
  813. TinyTag.__init__(self, filehandler, filesize)
  814. self.__tag_parsed = False
  815. def _determine_duration(self, fh):
  816. if not self.__tag_parsed:
  817. self._parse_tag(fh)
  818. def read_blocks(self, fh, blocks):
  819. # blocks are a list(tuple('fieldname', byte_count, cast_int), ...)
  820. decoded = {}
  821. for block in blocks:
  822. val = fh.read(block[1])
  823. if block[2]:
  824. val = _bytes_to_int_le(val)
  825. decoded[block[0]] = val
  826. return decoded
  827. def __bytes_to_guid(self, obj_id_bytes):
  828. return '-'.join([
  829. hex(_bytes_to_int_le(obj_id_bytes[:-12]))[2:].zfill(6),
  830. hex(_bytes_to_int_le(obj_id_bytes[-12:-10]))[2:].zfill(4),
  831. hex(_bytes_to_int_le(obj_id_bytes[-10:-8]))[2:].zfill(4),
  832. hex(_bytes_to_int(obj_id_bytes[-8:-6]))[2:].zfill(4),
  833. hex(_bytes_to_int(obj_id_bytes[-6:]))[2:].zfill(12),
  834. ])
  835. def __decode_string(self, bytestring):
  836. return self._unpad(codecs.decode(bytestring, 'utf-16'))
  837. def __decode_ext_desc(self, value_type, value):
  838. """ decode ASF_EXTENDED_CONTENT_DESCRIPTION_OBJECT values"""
  839. if value_type == 0: # Unicode string
  840. return self.__decode_string(value)
  841. elif value_type == 1: # BYTE array
  842. return value
  843. elif 1 < value_type < 6: # DWORD / QWORD / WORD
  844. return _bytes_to_int_le(value)
  845. def _parse_tag(self, fh):
  846. self.__tag_parsed = True
  847. guid = fh.read(16) # 128 bit GUID
  848. if guid != b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel':
  849. return # not a valid ASF container! see: http://www.garykessler.net/library/file_sigs.html
  850. size = struct.unpack('Q', fh.read(8))[0]
  851. obj_count = struct.unpack('I', fh.read(4))[0]
  852. if fh.read(2) != b'\x01\x02':
  853. # http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx#_Toc521913958
  854. return # not a valid asf header!
  855. while True:
  856. object_id = fh.read(16)
  857. object_size = _bytes_to_int_le(fh.read(8))
  858. if object_size == 0 or object_size > self.filesize:
  859. break # invalid object, stop parsing.
  860. if object_id == Wma.ASF_CONTENT_DESCRIPTION_OBJECT:
  861. len_blocks = self.read_blocks(fh, [
  862. ('title_length', 2, True),
  863. ('author_length', 2, True),
  864. ('copyright_length', 2, True),
  865. ('description_length', 2, True),
  866. ('rating_length', 2, True),
  867. ])
  868. data_blocks = self.read_blocks(fh, [
  869. ('title', len_blocks['title_length'], False),
  870. ('artist', len_blocks['author_length'], False),
  871. ('', len_blocks['copyright_length'], True),
  872. ('', len_blocks['description_length'], True),
  873. ('', len_blocks['rating_length'], True),
  874. ])
  875. for field_name, bytestring in data_blocks.items():
  876. if field_name:
  877. self._set_field(field_name, bytestring, self.__decode_string)
  878. elif object_id == Wma.ASF_EXTENDED_CONTENT_DESCRIPTION_OBJECT:
  879. mapping = {
  880. 'WM/TrackNumber': 'track',
  881. 'WM/PartOfSet': 'disc',
  882. 'WM/Year': 'year',
  883. 'WM/AlbumArtist': 'albumartist',
  884. 'WM/Genre': 'genre',
  885. 'WM/AlbumTitle': 'album',
  886. }
  887. # see: http://web.archive.org/web/20131203084402/http://msdn.microsoft.com/en-us/library/bb643323.aspx#_Toc509555195
  888. descriptor_count = _bytes_to_int_le(fh.read(2))
  889. for _ in range(descriptor_count):
  890. name_len = _bytes_to_int_le(fh.read(2))
  891. name = self.__decode_string(fh.read(name_len))
  892. value_type = _bytes_to_int_le(fh.read(2))
  893. value_len = _bytes_to_int_le(fh.read(2))
  894. value = fh.read(value_len)
  895. field_name = mapping.get(name)
  896. if field_name:
  897. field_value = self.__decode_ext_desc(value_type, value)
  898. self._set_field(field_name, field_value)
  899. elif object_id == Wma.ASF_FILE_PROPERTY_OBJECT:
  900. blocks = self.read_blocks(fh, [
  901. ('file_id', 16, False),
  902. ('file_size', 8, False),
  903. ('creation_date', 8, True),
  904. ('data_packets_count', 8, True),
  905. ('play_duration', 8, True),
  906. ('send_duration', 8, True),
  907. ('preroll', 8, True),
  908. ('flags', 4, False),
  909. ('minimum_data_packet_size', 4, True),
  910. ('maximum_data_packet_size', 4, True),
  911. ('maximum_bitrate', 4, False),
  912. ])
  913. self.duration = blocks.get('play_duration') / float(10000000)
  914. elif object_id == Wma.ASF_STREAM_PROPERTIES_OBJECT:
  915. blocks = self.read_blocks(fh, [
  916. ('stream_type', 16, False),
  917. ('error_correction_type', 16, False),
  918. ('time_offset', 8, True),
  919. ('type_specific_data_length', 4, True),
  920. ('error_correction_data_length', 4, True),
  921. ('flags', 2, True),
  922. ('reserved', 4, False)
  923. ])
  924. already_read = 0
  925. if blocks['stream_type'] == Wma.STREAM_TYPE_ASF_AUDIO_MEDIA:
  926. stream_info = self.read_blocks(fh, [
  927. ('codec_id_format_tag', 2, True),
  928. ('number_of_channels', 2, True),
  929. ('samples_per_second', 4, True),
  930. ('avg_bytes_per_second', 4, True),
  931. ('block_alignment', 2, True),
  932. ('bits_per_sample', 2, True),
  933. ])
  934. self.samplerate = stream_info['samples_per_second']
  935. self.bitrate = stream_info['avg_bytes_per_second'] * 8 / float(1000)
  936. already_read = 16
  937. fh.seek(blocks['type_specific_data_length'] - already_read, os.SEEK_CUR)
  938. fh.seek(blocks['error_correction_data_length'], os.SEEK_CUR)
  939. else:
  940. fh.seek(object_size - 24, os.SEEK_CUR) # read over onknown object ids