musicfix

music file renamer and tagger
git clone git://git.2f30.org/musicfix.git
Log | Files | Refs | README | LICENSE

musicfix (15477B)


      1 #!/usr/bin/env ruby
      2 # encoding: utf-8
      3 
      4 require 'rubygems'
      5 require 'fileutils'
      6 require 'open-uri'
      7 require 'stringex'
      8 require 'taglib'
      9 require 'yaml'
     10 
     11 # Headers
     12 Ver = '0.2.0'
     13 Homepage = 'http://git.2f30.org/musicfix/'
     14 Headers = {'User-Agent' => "musicfix/#{Ver} +#{Homepage}"}
     15 
     16 # Concatenate artist list using '&'
     17 # Convert "Sound, The (2)" to "The Sound"
     18 # Convert "Unknown (21), The" to "The Unknown"
     19 def mkartist al
     20     nl = al.collect {|a| a['name']}
     21     nl.each {|n| n.gsub! /\s\(\d+\)$/, ''}
     22     nl.each {|n| n.gsub! /(.*), The$/, 'The \1'}
     23     nl.each {|n| n.gsub! /\s\(\d+\)$/, ''}
     24     nl.join ' & '
     25 end
     26 
     27 # Convert "3" to ("", "03")
     28 # Convert "A" to ("A", "1")
     29 # Convert "A3" to ("A", "3")
     30 # Convert "2.3" to ("2", "03")
     31 # Convert "2.03" to ("2", "03")
     32 # Convert "CD2-3" to ("2", "03")
     33 def mkdiscnum pos
     34     np = pos.gsub /[-.]/, '#'
     35     d, n = np.split '#'
     36     if not n
     37         n = d
     38         d = ''
     39     else
     40         d = d.gsub /\D/, ''
     41     end
     42     parts = n.match /([A-Z])([0-9]*)/
     43     if parts then
     44         d = parts[1]
     45         n = parts[2] != "" && parts[2] || "1"
     46     else
     47         n = n.rjust(2, '0')
     48     end
     49     return d, n
     50 end
     51 
     52 # Add wordings for symbols
     53 syms = {"≠" => "not equals",
     54         "Χ" => "x",
     55         "★" => "blackstar",
     56         "•" => ""}
     57 Stringex::Localization.store_translations :en, :transliterations, syms
     58 
     59 # Convert to lowercase ASCII without punctuation
     60 # Convert "Jean-Michel Jarre" to "jean_michel_jarre"
     61 # Convert "Aaah...!" to "aaah"
     62 # Convert "Ruh / Spirit" to "ruh_spirit"
     63 def mkname n
     64     # These may be in track titles like "(7'' Version)"
     65     n = n.gsub '12"', '12 inch'
     66     n = n.gsub "12''", "12 inch"
     67     n = n.gsub "12'", "12 inch"
     68     n = n.gsub '10"', '10 inch'
     69     n = n.gsub "10''", "10 inch"
     70     n = n.gsub "10'", "10 inch"
     71     n = n.gsub '7"', '7 inch'
     72     n = n.gsub "7''", "7 inch"
     73     n = n.gsub "7'", "7 inch"
     74     n = n.gsub " & ", " and "
     75     n = n.gsub '.', ' '
     76     n = n.gsub '/', ' '
     77     # Transliterate
     78     n.to_url.gsub '-', '_'
     79 end
     80 
     81 # Get cover artwork
     82 def getimages rel
     83     return nil unless rel['images']
     84     imgs = []
     85     rel['images'].each do |img|
     86         imgs << img['uri']
     87     end
     88     imgs
     89 end
     90 
     91 # Formats we care about and their abbreviations
     92 # http://www.discogs.com/search/#more_facets_format_exact
     93 # Ignore "Vinyl" in favor of "LP"/"EP"/'7"'/'10"'/'12"' descriptions
     94 # Ignore "File" in favor of "MP3"/"WAV"/"FLAC" descriptions
     95 # Avoid too generic descriptions such as "Album"
     96 # Avoid too specific descriptions such as "Green", "Gatefold"
     97 @ft = {
     98     'CD' => 'CD',
     99     'CDr' => 'CDr',
    100     'LP' => 'LP',
    101     'EP' => 'EP',
    102     'Cassette' => 'Cass',
    103     '12"' => '12inch',
    104     '10"' => '10inch',
    105     '7"' => '7inch',
    106     'Mini-Album' => 'Mini',
    107     'Maxi-Single' => 'Maxi',
    108     'Picture Disc' => 'Pic',
    109     'Flexi-disc' => 'Flexi',
    110     'Promo' => 'Promo',
    111     'Reissue' => 'RE',
    112     'Remastered' => 'RM',
    113     'Remaster' => 'RM',
    114     'Repress' => 'RP',
    115     'Mispress' => 'MP',
    116     'Test Pressing' => 'TP',
    117     'Enhanced' => 'Enh',
    118     'Digipak ' => 'Dig',
    119     'Box Set' => 'Box',
    120     'Limited Edition' => 'Ltd',
    121     'Club Edition' => 'Club',
    122     'Compilation' => 'Comp',
    123     'Sampler' => 'Smplr',
    124     'Numbered' => 'Num',
    125     'Unofficial Release' => 'Unofficial',
    126     'Single Sided' => 'S/Sided',
    127     'MP3' => 'MP3',
    128     'AAC' => 'AAC',
    129     'FLAC' => 'FLAC',
    130     'WAV' => 'WAV',
    131 }
    132 
    133 # Make a sane format string also using format description
    134 def mkformat format
    135     f = []
    136     formats = []
    137     if format['name'] then
    138         formats << format['name']
    139     end
    140     if format['descriptions'] then
    141         formats += format['descriptions']
    142     end
    143     formats.each do |d|
    144         f << d if @ft.keys.include? d
    145     end
    146     f.join ' '
    147 end
    148 
    149 # Shorten certain common words that appear in format strings
    150 def mkshort n
    151     # Note that prefix substitution is broken
    152     ftre = /(#{@ft.keys.join('|')})/
    153     n.gsub(ftre, @ft)
    154 end
    155 
    156 # Return single item if array is full of duplicates
    157 def flatten_if_one ary
    158     if ary.uniq.length == 1 then
    159         ary.first
    160     else
    161         ary.uniq
    162     end
    163 end
    164 
    165 # Remove all tags and images for supported formats
    166 def rmtags fname
    167     TagLib::MPEG::File.open(fname) do |file|
    168         tag = file.id3v2_tag
    169         tag and tag.frame_list.each do |frame|
    170             tag.remove_frame frame
    171         end
    172         tag = file.id3v1_tag
    173         if tag then
    174             tag.artist = nil
    175             tag.album = nil
    176             tag.title = nil
    177             tag.track = 0
    178             tag.year = 0
    179             tag.genre = nil
    180             tag.comment = nil
    181         end
    182         file.save
    183     end
    184     TagLib::MP4::File.open(fname) do |file|
    185         tag = file.tag
    186         tag and tag.item_list_map.clear
    187         file.save
    188     end
    189     TagLib::Ogg::Vorbis::File.open(fname) do |file|
    190         tag = file.tag
    191         tag and tag.field_list_map.each do |field|
    192             tag.remove_field field[0]
    193         end
    194         file.save
    195     end
    196     TagLib::FLAC::File.open(fname) do |file|
    197         tag = file.xiph_comment
    198         tag and tag.field_list_map.each do |field|
    199             tag.remove_field field[0]
    200         end
    201         tag = file.id3v2_tag
    202         tag and tag.frame_list.each do |frame|
    203             tag.remove_frame frame
    204         end
    205         tag = file.id3v1_tag
    206         if tag then
    207             tag.artist = nil
    208             tag.album = nil
    209             tag.title = nil
    210             tag.track = 0
    211             tag.year = 0
    212             tag.genre = nil
    213             tag.comment = nil
    214         end
    215         file.remove_pictures
    216         file.save
    217     end
    218     TagLib::RIFF::AIFF::File.open(fname) do |file|
    219         tag = file.tag
    220         tag and tag.frame_list.each do |frame|
    221             tag.remove_frame frame
    222         end
    223         file.save
    224     end
    225     TagLib::RIFF::WAV::File.open(fname) do |file|
    226         tag = file.tag
    227         tag and tag.frame_list.each do |frame|
    228             tag.remove_frame frame
    229         end
    230         file.save
    231     end
    232 end
    233 
    234 # Parse command line
    235 usage = ''
    236 usage << "Usage: musicfix [fake] relid\n"
    237 usage << "       musicfix [fake] dump relid [relfile]\n"
    238 usage << "       musicfix [fake] load [relfile]\n"
    239 usage << "       musicfix [fake] tags [relfile]\n"
    240 fake = ARGV[0] == 'fake'
    241 ARGV.delete 'fake'
    242 cmd = ARGV[0] || (puts usage; exit)
    243 case cmd
    244 when 'load' then
    245     relfile = ARGV[1] || nil
    246 when 'dump' then
    247     relid = ARGV[1] || (puts usage; exit)
    248     relfile = ARGV[2] || 'release.yaml'
    249 when 'tags' then
    250     relfile = ARGV[1] || 'release.yaml'
    251 else
    252     relid = ARGV[0]
    253 end
    254 
    255 # Default configuration
    256 cfg = {}
    257 cfg['mdir'] = '~/music'
    258 cfg['track'] = '"#{mdir}/#{fba}-#{my}-#{fb}-#{fv}/#{fd}#{n}-#{fa}-#{ft}.#{x}"'
    259 cfg['image'] = '"#{mdir}/#{fba}-#{my}-#{fb}-#{fv}/#{zz}-#{fba}-#{fb}_cover#{i}.jpg"'
    260 cfg['rdata'] = '"#{mdir}/#{fba}-#{my}-#{fb}-#{fv}/#{zz}-#{fba}-#{fb}_release.yaml"'
    261 #cfg['after'] = '"mpc update #{fba}-#{my}-#{fb}-#{fv}"'
    262 cfg['nimg'] = 1
    263 
    264 # User configuration overrides
    265 cfgpath = File.expand_path('~/.musicfixrc')
    266 if File.exists? cfgpath
    267     new = YAML.load File.open(cfgpath, 'r')
    268     cfg.merge! new
    269 end
    270 
    271 # Authentication option
    272 urlopts = ''
    273 if cfg['token'] then
    274     urlopts = "?token=#{cfg['token']}"
    275 end
    276 
    277 # Expand music directory
    278 cfg['mdir'] = File.expand_path cfg['mdir']
    279 
    280 # Print configuration
    281 puts '# Configuration'
    282 puts cfg.to_yaml
    283 
    284 # Early file checks
    285 if cmd == 'dump' or cmd == 'tags' then
    286     if File.exists? relfile then
    287         STDERR.puts "Release file #{relfile} exists!"
    288         exit
    289     end
    290 end
    291 
    292 unless cmd == 'dump' then
    293     # Supported formats
    294     fmtre = /mp3|ogg|m4a|mpc|flac|wv|wav|aiff/i
    295     # Construct file list
    296     fl = Dir['*'].select {|f| File.extname(f).match fmtre}.sort
    297     if fl.empty? then
    298         STDERR.puts 'No music files found!'
    299         exit 1
    300     end
    301     # Output file list
    302     puts '# Files to process'
    303     puts fl.to_yaml
    304 end
    305 
    306 # Initialize release info
    307 if cmd == 'load' then
    308     # Load release data from file
    309     if relfile then
    310         # The user specified some file
    311         unless File.exists? relfile then
    312             STDERR.puts "Release file #{relfile} not found!"
    313             exit 1
    314         end
    315     else
    316         # Look for 'release.yaml' first
    317         if File.exists? 'release.yaml' then
    318             relfile = 'release.yaml'
    319         else
    320             # Look for any '.yaml' file
    321             relfl = Dir['*'].select {|f| File.extname(f).match /yaml/i}
    322             relfile = relfl.sort.first
    323         end
    324         unless relfile then
    325             STDERR.puts 'No release file found!'
    326             exit 1
    327         end
    328     end
    329     STDERR.puts "Loading release data from file..."
    330     rel = YAML.load File.open(relfile, 'r')
    331 elsif cmd == 'tags' then
    332     # Generate release file from audio file tags
    333     STDERR.puts "Generating release data from tags..."
    334     rel = {}
    335     rel['artist'] = []
    336     rel['album'] = []
    337     rel['year'] = []
    338     rel['masteryear'] = nil
    339     rel['genre'] = []
    340     rel['format'] = nil
    341     rel['comment'] = []
    342     rel['images'] = nil
    343     rel['tracklist'] = []
    344     # Populate tracklist
    345     fl.each do |fname|
    346         TagLib::FileRef.open(fname) do |f|
    347             trk = {}
    348             trk['pos'] = f.tag.track
    349             trk['artist'] = f.tag.artist
    350             trk['title'] = f.tag.title
    351             rel['tracklist'] << trk
    352             # Make lists and flatten afterwards
    353             rel['artist'] << f.tag.artist
    354             rel['album'] << f.tag.album
    355             rel['year'] << f.tag.year
    356             rel['genre'] << f.tag.genre
    357             rel['comment'] << f.tag.comment
    358         end
    359     end
    360     if rel['artist'].uniq.length == 1 then
    361         # Single-artist release
    362         rel['artist'] = rel['artist'].first
    363         rel['tracklist'].each do |trk|
    364             trk.delete 'artist'
    365         end
    366     else
    367         rel['artist'] = 'Various'
    368     end
    369     # These should be the same on all files
    370     rel['album'] = flatten_if_one rel['album']
    371     rel['year'] = flatten_if_one rel['year']
    372     rel['genre'] = flatten_if_one rel['genre']
    373     rel['comment'] = flatten_if_one rel['comment']
    374     # Assumptions
    375     rel['masteryear'] = rel['year']
    376     rel['format'] = 'CD'
    377 else
    378     # Get release data from Discogs
    379     STDERR.puts "Getting release data from Discogs..."
    380     r = YAML.load(open("https://api.discogs.com/releases/#{relid}#{urlopts}",
    381         Headers))
    382     mr = if r['master_id'] then
    383         YAML.load(open("https://api.discogs.com/masters/#{r['master_id']}#{urlopts}",
    384             Headers))
    385     end
    386     # Tracklist can contain dummy header tracks, strip them
    387     tl = r['tracklist'].select {|t| t['position'] != ''}
    388     # Gather release-wide data
    389     rel = {}
    390     rel['artist'] = mkartist r['artists']
    391     rel['album'] = r['title']
    392     rel['year'] = r['released']
    393     rel['masteryear'] = if mr then mr['year'] end || r['released']
    394     # Year can be full-date so keep only the year part
    395     rel['year'] = rel['year'].to_s.slice(0..3).to_i
    396     rel['masteryear'] = rel['masteryear'].to_s.slice(0..3).to_i
    397     rel['genre'] = if r['styles'] then r['styles'].first end ||
    398                    if r['genres'] then r['genres'].first end
    399     rel['format'] = mkformat r['formats'].first
    400     rel['comment'] = "Discogs: #{r['id']}"
    401     imgs = getimages(r)
    402     rel['images'] = if imgs then imgs.first(cfg['nimg']) end
    403     rel['tracklist'] = []
    404     # Populate tracklist
    405     tl.each do |s|
    406         trk = {}
    407         trk['pos'] = s['position']
    408         trk['artist'] = mkartist s['artists'] if s['artists']
    409         trk['title'] = s['title']
    410         rel['tracklist'] << trk
    411     end
    412 end
    413 
    414 # Output release info
    415 puts '# Release data'
    416 puts rel.to_yaml
    417 if cmd == 'dump' or cmd == 'tags' then
    418     STDERR.puts "Save rdata to #{relfile}"
    419     unless fake
    420         File.open(relfile, 'w') do |f|
    421             f.puts rel.to_yaml
    422         end
    423     end
    424     exit
    425 end
    426 
    427 # Variables for use in templates
    428 mdir = cfg['mdir']
    429 ba = rel['artist']
    430 b = rel['album']
    431 y = rel['year']
    432 my = rel['masteryear']
    433 g = rel['genre']
    434 v = rel['format']
    435 c = rel['comment']
    436 fba = mkname ba
    437 fb = mkname b
    438 fv = mkname (mkshort v)
    439 # Internal use only
    440 tl = rel['tracklist']
    441 
    442 # Sanity checks
    443 if tl.length != fl.length then
    444     puts "Found #{tl.length} tracks for #{fl.length} music files."
    445     if fl.length < tl.length then
    446         # Limit entries to number of files
    447         tl = tl.first fl.length
    448         print "Use only the first #{fl.length} entries? [y/N] "
    449     else
    450         # Limit files to available tracks
    451         fl = fl.first tl.length
    452         print "Use only the first #{fl.length} files? [y/N] "
    453     end
    454     res = STDIN.readline.strip
    455     exit unless res == 'y'
    456 end
    457 
    458 # First pass decides zero padding and file numbering
    459 zpad_disc = 0
    460 zpad_num = 0
    461 tl.each do |trk|
    462     disc, num = mkdiscnum trk['pos'].to_s
    463     if zpad_disc < disc.length then zpad_disc = disc.length end
    464     if zpad_num < num.length then zpad_num = num.length end
    465     trk['disc'] = disc
    466     trk['num'] = num
    467 end
    468 tl.each do |trk|
    469     trk['disc'] = trk['disc'].rjust(zpad_disc, '0')
    470     trk['num'] = trk['num'].rjust(zpad_num, '0')
    471 end
    472 
    473 # Loop over the music files and
    474 #   1. Copy them over with proper names
    475 #   2. Clear all tags and stored images
    476 #   3. Fix the tags on the new files
    477 tn = 0
    478 fl.each do |ofname|
    479     tn = tn.next
    480     trk = tl[tn - 1]
    481     # Use track artist for compilations, fallback to release
    482     a = trk['artist'] || rel['artist']
    483     t = trk['title']
    484     fa = mkname a
    485     ft = mkname t
    486     d = trk['disc']
    487     n = trk['num']
    488     fd = mkname d
    489     x = File.extname(ofname).delete('.').downcase
    490     nfname = eval cfg['track']
    491     # Add filename to track descriptor
    492     trk['file'] = nfname
    493     STDERR.puts "Copy track to #{nfname}"
    494     unless fake
    495         # Copy
    496         FileUtils.makedirs(File.dirname nfname)
    497         FileUtils.copy(ofname, nfname)
    498         # Clear
    499         rmtags nfname
    500         # Fix
    501         TagLib::FileRef.open(nfname) do |f|
    502             f.tag.artist = a
    503             f.tag.album = b
    504             f.tag.title = t
    505             f.tag.track = tn
    506             f.tag.year = y
    507             f.tag.genre = g
    508             f.tag.comment = c
    509             f.save
    510         end
    511     end
    512 end
    513 
    514 # Also save the first image of the artwork
    515 zz = '0' * (tl.first['disc'] + tl.first['num']).length
    516 if rel['images'] then
    517     relimgs = []
    518     rel['images'].each_with_index do |imgurl, idx|
    519         pad = rel['images'].length.to_s.length
    520         i = idx.to_s.rjust(pad, '0')
    521         if rel['images'].length == 1
    522             i = ''
    523         end
    524         # The variable i can be used in the image template
    525         imgname = eval cfg['image']
    526         STDERR.puts "Save image to #{imgname}"
    527         unless fake
    528             # Relative path or URL
    529             if File.exists? imgurl
    530                 img = open(imgurl).read
    531             else
    532                 img = open(imgurl, Headers).read
    533             end
    534             File.open(imgname, 'wb').write img
    535             # Update to local relative path now
    536             relimgs << (File.basename imgname)
    537         end
    538     end
    539     rel['images'] = relimgs
    540 end
    541 # Also save the release file for future use
    542 relfile = eval cfg['rdata']
    543 STDERR.puts "Save rdata to #{relfile}"
    544 unless fake
    545     # Sort tracklist in filename order
    546     rel['tracklist'].sort_by! {|s| s['file']}
    547     # Delete temporary data
    548     rel['tracklist'].each do |s|
    549         s.delete 'file'
    550         s.delete 'disc'
    551         s.delete 'num'
    552     end
    553     File.open(relfile, 'w') do |f|
    554         f.puts rel.to_yaml
    555     end
    556 end
    557 
    558 # Execute command if provided
    559 if cfg['after'] then
    560     run = eval cfg['after']
    561     STDERR.puts "Executing #{run}"
    562     unless fake
    563         puts `#{run}`
    564     end
    565 end
    566 
    567 # vim:set ts=4 sw=4 et: