Develop and Download Open Source Software

Browse Subversion Repository

Contents of /trunk/nicovideo-dl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 24 - (show annotations) (download)
Fri Jan 25 22:58:54 2019 UTC (5 years, 4 months ago) by paulliu
File size: 20602 byte(s)
move to python3

1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright (c) 2009 Keiichiro Nagano
5 # Copyright (c) 2009 Kimura Youichi
6 # Copyright (c) 2006-2008 Ricardo Garcia Gonzalez
7 # Copyright (c) 2008 Ying-Chun Liu (PaulLiu)
8 #
9 # Permission is hereby granted, free of charge, to any person obtaining a
10 # copy of this software and associated documentation files (the "Software"),
11 # to deal in the Software without restriction, including without limitation
12 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 # and/or sell copies of the Software, and to permit persons to whom the
14 # Software is furnished to do so, subject to the following conditions:
15 #
16 # The above copyright notice and this permission notice shall be included
17 # in all copies or substantial portions of the Software.
18 #
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 # OTHER DEALINGS IN THE SOFTWARE.
26 #
27 # Except as contained in this notice, the name(s) of the above copyright
28 # holders shall not be used in advertising or otherwise to promote the
29 # sale, use or other dealings in this Software without prior written
30 # authorization.
31 #
32 import getpass
33 import http
34 import math
35 import netrc
36 import optparse
37 import os
38 import re
39 import socket
40 import string
41 import sys
42 import time
43 import urllib
44 import urllib.request
45 import cgi
46 import codecs
47 import xml.parsers.expat
48 import html
49 import json
50
51 # Global constants
52 const_version = '2011.02.08'
53 const_project_url = 'http://sourceforge.jp/projects/nicovideo-dl'
54
55 const_1k = 1024
56 const_initial_block_size = 10 * const_1k
57 const_epsilon = 0.0001
58 const_timeout = 120
59
60 const_video_url_str = 'https://www.nicovideo.jp/watch/%s'
61 const_video_url_re = re.compile(r'^((?:http[s]?://)?(?:\w+\.)?(?:nicovideo\.jp/(?:v/|(?:watch(?:\.php)?))?/)?(\w+))')
62 const_login_url_str = 'https://account.nicovideo.jp/api/v1/login?site=niconico&mail_or_tel=1'
63 const_login_post_str = 'current_form=login_form&mail_tel=%s&password=%s&login__submit=Login'
64 const_url_url_param_re = re.compile(r"url[=](http[^&]*)")
65 const_video_url_info_str = 'https://flapi.nicovideo.jp/api/getflv/%s'
66 const_video_title_re = re.compile(r'<title>(.*)</title>', re.M | re.I)
67 const_video_type_re = re.compile(r'^http[s]?://.*\.nicovideo\.jp/smile\?(.*?)=.*')
68
69 const_comment_getthreadkey_url_str = 'http://flapi.nicovideo.jp/api/getthreadkey?thread=%s'
70 const_comment_request_str = '<thread thread="%s" version="20061206" res_from="-1000" user_id="%s"%s/>'
71
72 # Print error message, followed by standard advice information, and then exit
73 def error_advice_exit(error_text):
74 sys.stderr.write('Error: %s.\n' % error_text)
75 sys.stderr.write('Try again several times. It may be a temporary problem.\n')
76 sys.stderr.write('Other typical problems:\n\n')
77 sys.stderr.write('* Video no longer exists.\n')
78 sys.stderr.write('* You provided the account data, but it is not valid.\n')
79 sys.stderr.write('* The connection was cut suddenly for some reason.\n')
80 sys.stderr.write('* Your account is free and perhaps only usable at 02:00 to 19:00 (+0900).\n')
81 sys.stderr.write('* Niconico changed their system, and the program no longer works.\n')
82 sys.stderr.write('\nTry to confirm you are able to view the video using a web browser.\n')
83 sys.stderr.write('Use the same video URL and account information, if needed, with this program.\n')
84 sys.stderr.write('When using a proxy, make sure http_proxy has http://host:port format.\n')
85 sys.stderr.write('Try again several times and contact me if the problem persists.\n')
86 sys.exit('\n')
87
88 # Wrapper to create custom requests with typical headers
89 def request_create(url, extra_headers, post_data=None):
90 retval = urllib.request.Request(url)
91 if post_data is not None:
92 retval.data = post_data
93 retval.add_header('User-Agent', 'nicovideo-dl/%s (%s)' % (const_version, const_project_url))
94 retval.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
95 retval.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5')
96 retval.add_header('Accept-Language', 'en-us,en;q=0.5')
97 if extra_headers is not None:
98 for header in extra_headers:
99 retval.add_header(header[0], header[1])
100 return retval
101
102 # Perform a request, process headers and return response
103 def perform_request(url, headers=None, data=None):
104 request = request_create(url, headers, data)
105 response = urllib.request.urlopen(request)
106 return response
107
108 # Conditional print
109 def cond_print(str):
110 global cmdl_opts
111 if not (cmdl_opts.quiet or cmdl_opts.get_url):
112 sys.stdout.write(str)
113 sys.stdout.flush()
114
115 # Title string normalization
116 def title_string_norm(title):
117 title_s = title
118 for title_p in ['���������������������(SP1)',
119 '���������������������(���)',
120 '���������������������(���)', # as of Oct 2008
121 '���������������������(���)', # as of 5 Dec 2008
122 '���������������������(����)', # as of 12 Dec 2008
123 '���������������������(9)', # as of 29 Oct 2009
124 '��� ������������������(������)', # as of 29 Oct 2010
125 ]:
126 if (title_s.endswith(title_p)):
127 title_s = title_s[:title_s.rfind(title_p)]
128 break
129 title_s = title_s.replace(os.sep, '%')
130 title_s = '_'.join(title_s.split())
131 return title_s
132
133 # Title string minimal transformation
134 def title_string_touch(title):
135 return title.replace(os.sep, '%')
136
137 # Generic download step
138 def download_step(return_data_flag, step_title, step_error, url, post_data=None):
139 try:
140 cond_print('%s... ' % step_title)
141 response = perform_request(url, data=post_data)
142 data = response.read().decode(response.headers.get_content_charset())
143 cond_print('done.\n')
144 if return_data_flag:
145 return data, response
146 return None
147
148 # except (urllib.error.URLError, ValueError, http.client.HTTPException, TypeError, socket.error):
149 # cond_print('failed.\n')
150 # error_advice_exit(step_error)
151
152 except KeyboardInterrupt:
153 sys.exit('\n')
154
155 # Generic extract step
156 def extract_step(step_title, step_error, regexp, data):
157 try:
158 cond_print('%s... ' % step_title)
159 match = regexp.search(data)
160
161 if match is None:
162 cond_print('failed.\n')
163 error_advice_exit(step_error)
164
165 extracted_data = match.group(1)
166 cond_print('done.\n')
167 return extracted_data
168
169 except KeyboardInterrupt:
170 sys.exit('\n')
171
172 # Calculate new block size based on previous block size
173 def new_block_size(before, after, bytes):
174 new_min = max(bytes / 2.0, 1.0)
175 new_max = max(bytes * 2.0, 1.0)
176 dif = after - before
177 if dif < const_epsilon:
178 return int(new_max)
179 rate = bytes / dif
180 if rate > new_max:
181 return int(new_max)
182 if rate < new_min:
183 return int(new_min)
184 return int(rate)
185
186 # Get optimum 1k exponent to represent a number of bytes
187 def optimum_k_exp(num_bytes):
188 global const_1k
189 if num_bytes == 0:
190 return 0
191 return int(math.log(num_bytes, const_1k))
192
193 # Get optimum representation of number of bytes
194 def format_bytes(num_bytes):
195 global const_1k
196 try:
197 exp = optimum_k_exp(num_bytes)
198 suffix = 'bkMGTPEZY'[exp]
199 if exp == 0:
200 return '%s%s' % (num_bytes, suffix)
201 converted = float(num_bytes) / float(const_1k**exp)
202 return '%.2f%s' % (converted, suffix)
203 except IndexError:
204 sys.exit('Error: internal error formatting number of bytes.')
205
206 # Calculate ETA and return it in string format as MM:SS
207 def calc_eta(start, now, total, current):
208 dif = now - start
209 if current == 0 or dif < const_epsilon:
210 return '--:--'
211 rate = float(current) / dif
212 eta = int((total - current) / rate)
213 (eta_mins, eta_secs) = divmod(eta, 60)
214 if eta_mins > 99:
215 return '--:--'
216 return '%02d:%02d' % (eta_mins, eta_secs)
217
218 # Calculate speed and return it in string format
219 def calc_speed(start, now, bytes):
220 dif = now - start
221 if bytes == 0 or dif < const_epsilon:
222 return 'N/A b'
223 return format_bytes(float(bytes) / dif)
224
225 # Download comment and save to the file
226 def download_comment(cmdl_opts, video_url, video_filename, video_url_id, video_info_data):
227 def extract_param(param, key):
228 if key in param:
229 return param[key][0]
230 else:
231 error_advice_exit('cannot extract %s parameter' % key)
232
233 def post(name, url, post_data=None):
234 data, res = download_step(True, 'Retrieving %s' % name, 'unable to get %s' % name, url, post_data=post_data)
235 return data
236
237 def get_thread_info():
238 url = const_comment_getthreadkey_url_str % video_url_id
239 data = post('thread key', url)
240 info = cgi.parse_qs(data)
241 return extract_param(info, "threadkey"), extract_param(info, "force_184")
242
243 def make_request(thread_id, user_id):
244 options = ''
245
246 if video_url_id.isdigit():
247 options = ' threadkey="%s" force_184="%s"' % get_thread_info()
248
249 return const_comment_request_str % (thread_id, user_id, options)
250
251 def parse_comment_xml(comment_xml):
252 comments = []
253
254 def start(name, attrs):
255 if name == 'chat':
256 comments.append([int(attrs['vpos']), None])
257
258 def text(data):
259 comments[-1][1] = data
260
261 parser = xml.parsers.expat.ParserCreate()
262 parser.StartElementHandler = start
263 parser.CharacterDataHandler = text
264 parser.Parse(comment_xml)
265
266 comments.sort()
267
268 return comments
269
270 def write_comments(comments, f):
271 f.write('comments for %s on %s\n' % (video_url, time.asctime()))
272
273 if all(vpos < 600000 for vpos, comment in comments):
274 timestamp = '%02d:%02d '
275 else:
276 timestamp = '%03d:%02d '
277
278 for vpos, comment in comments:
279 f.write(timestamp % divmod(vpos / 100, 60))
280 f.write(comment)
281 f.write('\n')
282
283 info = cgi.parse_qs(video_info_data)
284 thread_id = extract_param(info, "thread_id")
285 user_id = extract_param(info, "user_id")
286 msg_url = extract_param(info, "ms")
287
288 request_xml = make_request(thread_id, user_id)
289 comment_xml = post('comment', msg_url, post_data=request_xml)
290
291 outfile = cmdl_opts.comment_outfile
292
293 if outfile is None:
294 if cmdl_opts.comment_in_xml:
295 outfile = '%s.xml' % video_filename
296 else:
297 outfile = '%s.txt' % video_filename
298
299 try:
300 f = codecs.open(outfile, 'w', 'utf-8')
301 except (IOError, OSError):
302 sys.exit('Error: unable to open "%s" for writing.' % outfile)
303
304 if cmdl_opts.comment_in_xml:
305 f.write(comment_xml.decode('utf-8'))
306 else:
307 write_comments(parse_comment_xml(comment_xml), f)
308
309 f.close()
310
311 cond_print('comment saved to %s\n' % outfile)
312
313 # Create the command line options parser and parse command line
314 cmdl_usage = 'usage: %prog [options] video_url'
315 cmdl_version = const_version
316 cmdl_parser = optparse.OptionParser(usage=cmdl_usage, version=cmdl_version, conflict_handler='resolve')
317 cmdl_parser.add_option('-h', '--help', action='help', help='print this help text and exit')
318 cmdl_parser.add_option('-v', '--version', action='version', help='print program version and exit')
319 cmdl_parser.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username')
320 cmdl_parser.add_option('-p', '--password', dest='password', metavar='PASSWORD', help='account password')
321 cmdl_parser.add_option('-o', '--output', dest='outfile', metavar='FILE', help='output video file name')
322 cmdl_parser.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode')
323 cmdl_parser.add_option('-s', '--simulate', action='store_true', dest='simulate', help='do not download video')
324 cmdl_parser.add_option('-t', '--title', action='store_true', dest='use_title', help='use title in file name')
325 cmdl_parser.add_option('-l', '--literal', action='store_true', dest='use_literal', help='use literal title in file name')
326 cmdl_parser.add_option('-n', '--netrc', action='store_true', dest='use_netrc', help='use .netrc authentication data')
327 cmdl_parser.add_option('-g', '--get-url', action='store_true', dest='get_url', help='print final video URL only')
328 cmdl_parser.add_option('-2', '--title-too', action='store_true', dest='get_title', help='used with -g, print title too')
329 cmdl_parser.add_option('-c', '--get-comment', action='store_true', dest='get_comment', help='download comment')
330 cmdl_parser.add_option('-x', '--raw-comment', action='store_true', dest='comment_in_xml', help='save comment in raw xml')
331 cmdl_parser.add_option('--comment-output', dest='comment_outfile', metavar='FILE', help='output comment file name')
332 (cmdl_opts, cmdl_args) = cmdl_parser.parse_args()
333
334 # Set socket timeout
335 socket.setdefaulttimeout(const_timeout)
336
337 # Get video URL
338 if len(cmdl_args) == 0:
339 cmdl_parser.print_help()
340 sys.exit('\n')
341
342 # Check conflicting options
343 if cmdl_opts.outfile is not None and (cmdl_opts.simulate or cmdl_opts.get_url):
344 sys.stderr.write('Warning: video file name given but will not be used.\n')
345
346 if cmdl_opts.outfile is not None and (cmdl_opts.use_title or cmdl_opts.use_literal):
347 sys.exit('Error: using the video title conflicts with using a given file name.')
348
349 if cmdl_opts.use_title and cmdl_opts.use_literal:
350 sys.exit('Error: cannot use title and literal title at the same time.')
351
352 if cmdl_opts.quiet and cmdl_opts.get_url:
353 sys.exit('Error: cannot be quiet and print final URL at the same time.')
354
355 # Incorrect option formatting
356 if cmdl_opts.username is None and cmdl_opts.password is not None:
357 sys.exit('Error: password give but username is missing.')
358
359 if cmdl_opts.use_netrc and (cmdl_opts.username is not None or cmdl_opts.password is not None):
360 sys.exit('Error: cannot use netrc and username/password at the same time.')
361
362 if cmdl_opts.get_url is None and cmdl_opts.get_title is not None:
363 sys.exit('Error: getting title requires getting URL.')
364
365 # Get account information if any
366 account_username = None
367 account_password = None
368
369 if cmdl_opts.use_netrc:
370 try:
371 info = netrc.netrc().authenticators('nicovideo')
372 if info is None:
373 sys.exit('Error: no authenticators for machine nicovideo.')
374 netrc_username = info[0]
375 netrc_password = info[2]
376 except IOError:
377 sys.exit('Error: unable to read .netrc file.')
378 except netrc.NetrcParseError:
379 sys.exit('Error: unable to parse .netrc file.')
380
381 if cmdl_opts.password is not None:
382 account_username = cmdl_opts.username
383 account_password = cmdl_opts.password
384 else:
385 if cmdl_opts.username is not None and cmdl_opts.use_netrc:
386 if cmdl_opts.username != netrc_username:
387 sys.exit('Error: conflicting username from .netrc and command line options.')
388 account_username = cmdl_opts.username
389 account_password = netrc_password
390 elif cmdl_opts.username is not None:
391 account_username = cmdl_opts.username
392 account_password = getpass.getpass('Type Niconico password and press return: ')
393 elif cmdl_opts.use_netrc:
394 if len(netrc_username) == 0:
395 sys.exit('Error: empty username in .netrc file.')
396 account_username = netrc_username
397 account_password = netrc_password
398 elif account_username is None:
399 account_username = input("Type Niconico E-mail account: ")
400 account_password = getpass.getpass('Type Niconico password and press return: ')
401
402 # Install cookie and proxy handlers
403 urllib.request.install_opener(urllib.request.build_opener(urllib.request.ProxyHandler()))
404 urllib.request.install_opener(urllib.request.build_opener(urllib.request.HTTPCookieProcessor()))
405
406 # Log in
407 if account_username is not None:
408 url = const_login_url_str
409 post = const_login_post_str % (account_username, account_password)
410 download_step(False, 'Logging in', 'unable to log in', url, post.encode("utf-8"))
411
412 for video_url_cmdl in cmdl_args:
413 # Verify video URL format and convert to "standard" format
414 video_url_mo = const_video_url_re.match(video_url_cmdl)
415 if video_url_mo is None:
416 sys.exit('Error: URL does not seem to be a niconico video URL. If it is, report a bug.')
417 video_url_id = video_url_mo.group(2)
418 video_url = const_video_url_str % video_url_id
419
420 video_extension = '.flv'
421
422 # Retrieve video webpage
423 video_webpage, response = download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url)
424
425 # Reconvert URL if redirected
426 if response.geturl() != video_url:
427 video_url_id = const_video_url_re.match(response.geturl()).group(2)
428 video_url = const_video_url_str % video_url_id
429
430 # Extract video title if needed
431 if cmdl_opts.use_title or cmdl_opts.use_literal or cmdl_opts.get_title:
432 video_title = extract_step('Extracting video title', 'unable to extract video title', const_video_title_re, video_webpage)
433
434 # Extract needed video URL parameters
435 video_url_info = const_video_url_info_str % video_url_id
436 video_info_data, response = download_step(True, 'Retrieving info data', 'unable to retrieve video webpage', video_url_info)
437
438 cond_print('Extracting URL "url" parameter... ')
439 video_url_url_param = urllib.parse.parse_qs(video_info_data)
440 if ('url' in video_url_url_param):
441 video_url_url_param=video_url_url_param['url'][0]
442 else:
443 error_advice_exit('cannot extract url parameter')
444 cond_print('done.\n')
445 video_url_real = video_url_url_param
446
447 video_url_info = const_video_url_str % video_url_id
448 video_info_data, response = download_step(True, 'Retrieving info data', 'unable to retrieve video webpage', video_url_info)
449 data_api_data_re = re.compile(r'.*data-api-data=["]([^"]*)["].*', re.M | re.I | re.MULTILINE)
450 data_api_data_match = data_api_data_re.search(video_info_data)
451 if (not (data_api_data_match is None)):
452 video_url_url_param = json.loads(html.unescape(data_api_data_match.group(1)))
453 if ('video' in video_url_url_param and 'smileInfo' in video_url_url_param['video'] and 'url' in video_url_url_param['video']['smileInfo']):
454 video_url_real = video_url_url_param['video']['smileInfo']['url']
455
456 # Extract video type and modify video_extension
457 video_type_mo = const_video_type_re.match(video_url_real)
458 if (video_type_mo):
459 if video_type_mo.group(1) == "s":
460 video_extension = ".swf"
461 elif video_type_mo.group(1) == "m":
462 video_extension = ".mp4"
463
464 # Get output file name
465 if cmdl_opts.outfile is None:
466 video_filename = '%s%s' % (video_url_id, video_extension)
467 else:
468 video_filename = cmdl_opts.outfile
469
470 # Rebuild filename if needed
471 if cmdl_opts.use_title or cmdl_opts.use_literal:
472 if cmdl_opts.use_title:
473 prefix = title_string_norm(video_title)
474 else:
475 prefix = title_string_touch(video_title)
476 video_filename = '%s-%s%s' % (prefix, video_url_id, video_extension)
477
478 # Check name
479 if not video_filename.lower().endswith(video_extension):
480 sys.stderr.write('Warning: video file name does not end in %s\n' % video_extension)
481
482 # Download and save comment
483 if cmdl_opts.get_comment:
484 download_comment(cmdl_opts, video_url, video_filename, video_url_id, video_info_data)
485
486 # Retrieve video data
487 try:
488 cond_print('Requesting video file... ')
489 video_data = perform_request(video_url_real)
490 cond_print('done.\n')
491 cond_print('Video data found at %s\n' % video_data.geturl())
492
493 if cmdl_opts.get_title:
494 print (video_title)
495
496 if cmdl_opts.get_url:
497 print (video_data.geturl())
498
499 if cmdl_opts.simulate or cmdl_opts.get_url:
500 continue
501
502 try:
503 video_file = open(video_filename, 'wb')
504 except (IOError, OSError):
505 sys.exit('Error: unable to open "%s" for writing.' % video_filename)
506 try:
507 video_len = int(video_data.info()['Content-length'])
508 video_len_str = format_bytes(video_len)
509 except KeyError:
510 video_len = None
511 video_len_str = 'N/A'
512
513 byte_counter = 0
514 block_size = const_initial_block_size
515 start_time = time.time()
516 while True:
517 if video_len is not None:
518 percent = float(byte_counter) / float(video_len) * 100.0
519 percent_str = '%.1f' % percent
520 eta_str = calc_eta(start_time, time.time(), video_len, byte_counter)
521 else:
522 percent_str = '---.-'
523 eta_str = '--:--'
524 counter = format_bytes(byte_counter)
525 speed_str = calc_speed(start_time, time.time(), byte_counter)
526 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str, counter, video_len_str, speed_str, eta_str))
527
528 before = time.time()
529 video_block = video_data.read(block_size)
530 after = time.time()
531 dl_bytes = len(video_block)
532 if dl_bytes == 0:
533 break
534 byte_counter += dl_bytes
535 video_file.write(video_block)
536 block_size = new_block_size(before, after, dl_bytes)
537
538 if video_len is not None and byte_counter != video_len:
539 error_advice_exit('server did not send the expected amount of data')
540
541 video_file.close()
542 cond_print('done.\n')
543 cond_print('Video data saved to %s\n' % video_filename)
544
545 except (urllib.request.URLError, ValueError, http.client.HTTPException, TypeError, socket.error):
546 cond_print('failed.\n')
547 error_advice_exit('unable to download video data')
548
549 except KeyboardInterrupt:
550 sys.exit('\n')
551 # Finish
552 sys.exit()

Properties

Name Value
svn:executable *

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26