| 1 |
// extract.cpp: multi-format extractor |
| 2 |
|
| 3 |
// Copyright Takeshi Mouri 2006-2018. |
| 4 |
// Distributed under the Boost Software License, Version 1.0. |
| 5 |
// (See accompanying file LICENSE_1_0.txt or copy at |
| 6 |
// http://www.boost.org/LICENSE_1_0.txt) |
| 7 |
|
| 8 |
// See http://hamigaki.sourceforge.jp/libs/archivers for library home page. |
| 9 |
|
| 10 |
|
| 11 |
// Security warning: |
| 12 |
// This program never check the validity of paths in the archive. |
| 13 |
// See http://www.forest.impress.co.jp/article/2004/07/30/arcsecurity.html . |
| 14 |
// (The above link is Japanese site) |
| 15 |
|
| 16 |
#include <hamigaki/archivers/lzh_file.hpp> |
| 17 |
#include <hamigaki/archivers/tbz2_file.hpp> |
| 18 |
#include <hamigaki/archivers/tgz_file.hpp> |
| 19 |
#include <hamigaki/archivers/zip_file.hpp> |
| 20 |
#include <hamigaki/filesystem/operations.hpp> |
| 21 |
#include <boost/algorithm/string/predicate.hpp> |
| 22 |
#include <boost/filesystem/convenience.hpp> |
| 23 |
#include <boost/filesystem/operations.hpp> |
| 24 |
#include <boost/iostreams/copy.hpp> |
| 25 |
#include <clocale> |
| 26 |
#include <iostream> |
| 27 |
#include <memory> |
| 28 |
#include <stdexcept> |
| 29 |
|
| 30 |
namespace ar = hamigaki::archivers; |
| 31 |
namespace fs_ex = hamigaki::filesystem; |
| 32 |
namespace io_ex = hamigaki::iostreams; |
| 33 |
namespace algo = boost::algorithm; |
| 34 |
namespace fs = boost::filesystem; |
| 35 |
namespace io = boost::iostreams; |
| 36 |
|
| 37 |
template<class Path> |
| 38 |
bool is_valid_path(const Path& ph) |
| 39 |
{ |
| 40 |
#if !defined(HAMIGAKI_ALLOW_DIRECTORY_TRAVERSAL) |
| 41 |
if (ph.has_root_name() || ph.has_root_directory()) |
| 42 |
return false; |
| 43 |
for (typename Path::iterator it = ph.begin(); it != ph.end(); ++it) |
| 44 |
{ |
| 45 |
if (*it == "..") |
| 46 |
return false; |
| 47 |
} |
| 48 |
#endif |
| 49 |
return true; |
| 50 |
} |
| 51 |
|
| 52 |
struct entry |
| 53 |
{ |
| 54 |
fs_ex::file_type type; |
| 55 |
boost::filesystem::path path; |
| 56 |
boost::filesystem::path link_path; |
| 57 |
boost::filesystem::path hard_link_path; |
| 58 |
boost::optional<boost::uintmax_t> compressed_size; |
| 59 |
boost::optional<boost::uintmax_t> file_size; |
| 60 |
boost::optional<fs_ex::timestamp> last_write_time; |
| 61 |
boost::optional<fs_ex::timestamp> last_access_time; |
| 62 |
boost::optional<fs_ex::timestamp> last_change_time; |
| 63 |
boost::optional<fs_ex::timestamp> creation_time; |
| 64 |
boost::optional<boost::uint16_t> attributes; |
| 65 |
boost::optional<boost::uint16_t> permissions; |
| 66 |
boost::optional<boost::intmax_t> uid; |
| 67 |
boost::optional<boost::intmax_t> gid; |
| 68 |
std::string user_name; |
| 69 |
std::string group_name; |
| 70 |
std::string comment; |
| 71 |
}; |
| 72 |
|
| 73 |
template<class Header> |
| 74 |
struct header_traits; |
| 75 |
|
| 76 |
template<> |
| 77 |
struct header_traits<ar::lha::header> |
| 78 |
{ |
| 79 |
static entry to_entry(const ar::lha::header& head) |
| 80 |
{ |
| 81 |
entry e; |
| 82 |
|
| 83 |
if (head.is_symlink()) |
| 84 |
e.type = fs_ex::symlink_file; |
| 85 |
else if (head.is_directory()) |
| 86 |
e.type = fs_ex::directory_file; |
| 87 |
else |
| 88 |
e.type = fs_ex::regular_file; |
| 89 |
|
| 90 |
e.path = head.path; |
| 91 |
e.link_path = head.link_path; |
| 92 |
e.compressed_size = head.compressed_size; |
| 93 |
e.file_size = head.file_size; |
| 94 |
|
| 95 |
if (head.timestamp) |
| 96 |
{ |
| 97 |
const ar::lha::windows::timestamp& ts = head.timestamp.get(); |
| 98 |
e.last_write_time = |
| 99 |
fs_ex::timestamp::from_windows_file_time(ts.last_write_time); |
| 100 |
e.last_access_time = |
| 101 |
fs_ex::timestamp::from_windows_file_time(ts.last_access_time); |
| 102 |
e.creation_time = |
| 103 |
fs_ex::timestamp::from_windows_file_time(ts.creation_time); |
| 104 |
} |
| 105 |
else |
| 106 |
e.last_write_time = fs_ex::timestamp::from_time_t(head.update_time); |
| 107 |
|
| 108 |
e.attributes = head.attributes; |
| 109 |
|
| 110 |
if (head.permissions) |
| 111 |
e.permissions = head.permissions.get(); |
| 112 |
|
| 113 |
if (head.owner) |
| 114 |
{ |
| 115 |
e.uid = head.owner->uid; |
| 116 |
e.gid = head.owner->gid; |
| 117 |
} |
| 118 |
|
| 119 |
e.user_name = head.user_name; |
| 120 |
e.group_name = head.group_name; |
| 121 |
e.comment = head.comment; |
| 122 |
|
| 123 |
return e; |
| 124 |
} |
| 125 |
}; |
| 126 |
|
| 127 |
template<> |
| 128 |
struct header_traits<ar::tar::header> |
| 129 |
{ |
| 130 |
static entry to_entry(const ar::tar::header& head) |
| 131 |
{ |
| 132 |
entry e; |
| 133 |
|
| 134 |
if (head.type_flag == ar::tar::type_flag::symlink) |
| 135 |
e.type = fs_ex::symlink_file; |
| 136 |
else if (head.type_flag == ar::tar::type_flag::directory) |
| 137 |
e.type = fs_ex::directory_file; |
| 138 |
else |
| 139 |
e.type = fs_ex::regular_file; |
| 140 |
|
| 141 |
e.path = head.path; |
| 142 |
if (head.type_flag == ar::tar::type_flag::link) |
| 143 |
e.hard_link_path = head.link_path; |
| 144 |
else |
| 145 |
e.link_path = head.link_path; |
| 146 |
e.compressed_size = head.file_size; |
| 147 |
e.file_size = head.file_size; |
| 148 |
|
| 149 |
e.last_write_time = head.modified_time; |
| 150 |
e.last_access_time = head.access_time; |
| 151 |
e.last_change_time = head.change_time; |
| 152 |
|
| 153 |
e.permissions = head.permissions; |
| 154 |
e.uid = head.uid; |
| 155 |
e.gid = head.gid; |
| 156 |
|
| 157 |
e.user_name = head.user_name; |
| 158 |
e.group_name = head.group_name; |
| 159 |
e.comment = head.comment; |
| 160 |
|
| 161 |
return e; |
| 162 |
} |
| 163 |
}; |
| 164 |
|
| 165 |
template<> |
| 166 |
struct header_traits<ar::zip::header> |
| 167 |
{ |
| 168 |
static entry to_entry(const ar::zip::header& head) |
| 169 |
{ |
| 170 |
entry e; |
| 171 |
|
| 172 |
if (head.is_symlink()) |
| 173 |
e.type = fs_ex::symlink_file; |
| 174 |
else if (head.is_directory()) |
| 175 |
e.type = fs_ex::directory_file; |
| 176 |
else |
| 177 |
e.type = fs_ex::regular_file; |
| 178 |
|
| 179 |
e.path = head.path; |
| 180 |
e.link_path = head.link_path; |
| 181 |
e.compressed_size = head.compressed_size; |
| 182 |
e.file_size = head.file_size; |
| 183 |
|
| 184 |
if (head.modified_time) |
| 185 |
{ |
| 186 |
e.last_write_time = |
| 187 |
fs_ex::timestamp::from_time_t(*head.modified_time); |
| 188 |
} |
| 189 |
else |
| 190 |
e.last_write_time = fs_ex::timestamp::from_time_t(head.update_time); |
| 191 |
|
| 192 |
if (head.access_time) |
| 193 |
{ |
| 194 |
e.last_access_time = |
| 195 |
fs_ex::timestamp::from_time_t(*head.access_time); |
| 196 |
} |
| 197 |
if (head.creation_time) |
| 198 |
{ |
| 199 |
e.creation_time = |
| 200 |
fs_ex::timestamp::from_time_t(*head.creation_time); |
| 201 |
} |
| 202 |
|
| 203 |
e.attributes = head.attributes; |
| 204 |
e.permissions = head.permissions; |
| 205 |
|
| 206 |
if (head.uid) |
| 207 |
e.uid = head.uid.get(); |
| 208 |
|
| 209 |
if (head.gid) |
| 210 |
e.gid = head.gid.get(); |
| 211 |
|
| 212 |
e.comment = head.comment; |
| 213 |
|
| 214 |
return e; |
| 215 |
} |
| 216 |
}; |
| 217 |
|
| 218 |
class extractor_base |
| 219 |
{ |
| 220 |
public: |
| 221 |
typedef char char_type; |
| 222 |
|
| 223 |
struct category |
| 224 |
: boost::iostreams::input |
| 225 |
, boost::iostreams::device_tag |
| 226 |
{}; |
| 227 |
|
| 228 |
virtual ~extractor_base(){} |
| 229 |
|
| 230 |
bool next_entry() |
| 231 |
{ |
| 232 |
return do_next_entry(); |
| 233 |
} |
| 234 |
|
| 235 |
entry current_entry() const |
| 236 |
{ |
| 237 |
return do_current_entry(); |
| 238 |
} |
| 239 |
|
| 240 |
std::streamsize read(char* s, std::streamsize n) |
| 241 |
{ |
| 242 |
return do_read(s, n); |
| 243 |
} |
| 244 |
|
| 245 |
private: |
| 246 |
virtual bool do_next_entry() = 0; |
| 247 |
virtual entry do_current_entry() const = 0; |
| 248 |
virtual std::streamsize do_read(char* s, std::streamsize n) = 0; |
| 249 |
}; |
| 250 |
|
| 251 |
template<class Source> |
| 252 |
class extractor : public extractor_base |
| 253 |
{ |
| 254 |
public: |
| 255 |
explicit extractor(const Source& src) : src_(src) {} |
| 256 |
|
| 257 |
private: |
| 258 |
Source src_; |
| 259 |
|
| 260 |
bool do_next_entry() // virtual |
| 261 |
{ |
| 262 |
return src_.next_entry(); |
| 263 |
} |
| 264 |
|
| 265 |
entry do_current_entry() const // virtual |
| 266 |
{ |
| 267 |
typedef typename Source::header_type header_type; |
| 268 |
return header_traits<header_type>::to_entry(src_.header()); |
| 269 |
} |
| 270 |
|
| 271 |
std::streamsize do_read(char* s, std::streamsize n) // virtual |
| 272 |
{ |
| 273 |
return src_.read(s, n); |
| 274 |
} |
| 275 |
}; |
| 276 |
|
| 277 |
int main(int argc, char* argv[]) |
| 278 |
{ |
| 279 |
try |
| 280 |
{ |
| 281 |
if (argc != 2) |
| 282 |
{ |
| 283 |
std::cerr << "Usage: extract (filename)" << std::endl; |
| 284 |
return 1; |
| 285 |
} |
| 286 |
|
| 287 |
std::setlocale(LC_ALL, ""); |
| 288 |
|
| 289 |
std::auto_ptr<extractor_base> ext_ptr; |
| 290 |
const std::string filename(argv[1]); |
| 291 |
const fs::path ph(filename); |
| 292 |
if (!fs::exists(ph)) |
| 293 |
throw std::runtime_error("file not found"); |
| 294 |
|
| 295 |
if (algo::ends_with(filename, ".lzh")) |
| 296 |
{ |
| 297 |
ext_ptr.reset(new extractor< |
| 298 |
ar::lzh_file_source>(ar::lzh_file_source(filename))); |
| 299 |
} |
| 300 |
else if (algo::ends_with(filename, ".tar")) |
| 301 |
{ |
| 302 |
ext_ptr.reset(new extractor< |
| 303 |
ar::tar_file_source>(ar::tar_file_source(filename))); |
| 304 |
} |
| 305 |
else if (algo::ends_with(filename, ".zip")) |
| 306 |
{ |
| 307 |
ext_ptr.reset(new extractor< |
| 308 |
ar::zip_file_source>(ar::zip_file_source(filename))); |
| 309 |
} |
| 310 |
else if ( |
| 311 |
algo::ends_with(filename, ".tar.bz2") || |
| 312 |
algo::ends_with(filename, ".tbz2") || |
| 313 |
algo::ends_with(filename, ".tb2") || |
| 314 |
algo::ends_with(filename, ".tbz") ) |
| 315 |
{ |
| 316 |
ext_ptr.reset(new extractor< |
| 317 |
ar::tbz2_file_source>(ar::tbz2_file_source(filename))); |
| 318 |
} |
| 319 |
else if ( |
| 320 |
algo::ends_with(filename, ".tar.gz") || |
| 321 |
algo::ends_with(filename, ".tgz") ) |
| 322 |
{ |
| 323 |
ext_ptr.reset(new extractor< |
| 324 |
ar::tgz_file_source>(ar::tgz_file_source(filename))); |
| 325 |
} |
| 326 |
else if (algo::ends_with(filename, ".gz")) |
| 327 |
{ |
| 328 |
const std::string& leaf = ph.leaf(); |
| 329 |
if (leaf.size() < 4) |
| 330 |
throw std::runtime_error("bad filename"); |
| 331 |
|
| 332 |
std::string new_name = leaf.substr(0, leaf.size()-3); |
| 333 |
|
| 334 |
io::gzip_decompressor gzip; |
| 335 |
io_ex::file_source src(filename); |
| 336 |
char buf[4096]; |
| 337 |
std::streamsize amt = io::read(gzip, src, buf, sizeof(buf)); |
| 338 |
|
| 339 |
const std::string& org_name = gzip.file_name(); |
| 340 |
if (!org_name.empty()) |
| 341 |
new_name = org_name; |
| 342 |
|
| 343 |
std::cout << new_name << '\n'; |
| 344 |
|
| 345 |
io_ex::file_sink sink(new_name); |
| 346 |
if (amt != -1) |
| 347 |
io::write(sink, buf, amt); |
| 348 |
|
| 349 |
io::copy( |
| 350 |
io::compose(boost::ref(gzip), boost::ref(src)), |
| 351 |
boost::ref(sink) |
| 352 |
); |
| 353 |
|
| 354 |
if (gzip.mtime()) |
| 355 |
fs::last_write_time(new_name, gzip.mtime()); |
| 356 |
|
| 357 |
return 0; |
| 358 |
} |
| 359 |
else if (algo::ends_with(filename, ".bz2")) |
| 360 |
{ |
| 361 |
const std::string& leaf = ph.leaf(); |
| 362 |
if (leaf.size() < 5) |
| 363 |
throw std::runtime_error("bad filename"); |
| 364 |
|
| 365 |
const std::string new_name = leaf.substr(0, leaf.size()-4); |
| 366 |
std::cout << new_name << '\n'; |
| 367 |
|
| 368 |
io::copy( |
| 369 |
io::compose( |
| 370 |
io::bzip2_decompressor(), io_ex::file_source(filename) |
| 371 |
), |
| 372 |
io_ex::file_sink(new_name) |
| 373 |
); |
| 374 |
|
| 375 |
return 0; |
| 376 |
} |
| 377 |
else |
| 378 |
throw std::runtime_error("unsupported format"); |
| 379 |
|
| 380 |
while (ext_ptr->next_entry()) |
| 381 |
{ |
| 382 |
const entry& e = ext_ptr->current_entry(); |
| 383 |
|
| 384 |
std::cout << e.path.string() << '\n'; |
| 385 |
if (!is_valid_path(e.path)) |
| 386 |
{ |
| 387 |
std::cerr << "Warning: invalid path" << '\n'; |
| 388 |
continue; |
| 389 |
} |
| 390 |
|
| 391 |
if (!e.hard_link_path.empty()) |
| 392 |
fs_ex::create_hard_link(e.hard_link_path, e.path); |
| 393 |
else if (e.type == fs_ex::symlink_file) |
| 394 |
fs_ex::create_symlink(e.link_path, e.path); |
| 395 |
else if (e.type == fs_ex::directory_file) |
| 396 |
fs::create_directories(e.path); |
| 397 |
else |
| 398 |
{ |
| 399 |
fs::create_directories(e.path.branch_path()); |
| 400 |
|
| 401 |
io::copy( |
| 402 |
boost::ref(*ext_ptr), |
| 403 |
io_ex::file_sink( |
| 404 |
e.path.file_string(), std::ios_base::binary), |
| 405 |
1024*8 |
| 406 |
); |
| 407 |
} |
| 408 |
|
| 409 |
// Note: |
| 410 |
// The POSIX chown() clears S_ISUID/S_ISGID bits. |
| 411 |
// So, we must call owner() before calling change_permissions(). |
| 412 |
fs_ex::error_code ec; |
| 413 |
fs_ex::change_owner(e.path, e.uid, e.gid, ec); |
| 414 |
|
| 415 |
if (e.attributes) |
| 416 |
fs_ex::change_attributes(e.path, e.attributes.get(), ec); |
| 417 |
|
| 418 |
if (e.permissions) |
| 419 |
fs_ex::change_permissions(e.path, e.permissions.get(), ec); |
| 420 |
|
| 421 |
if (e.last_write_time) |
| 422 |
fs_ex::last_write_time(e.path, e.last_write_time.get()); |
| 423 |
if (e.last_access_time) |
| 424 |
fs_ex::last_access_time(e.path, e.last_access_time.get()); |
| 425 |
if (e.creation_time) |
| 426 |
fs_ex::creation_time(e.path, e.creation_time.get()); |
| 427 |
} |
| 428 |
|
| 429 |
return 0; |
| 430 |
} |
| 431 |
catch (const std::exception& e) |
| 432 |
{ |
| 433 |
std::cerr << "Error: " << e.what() << std::endl; |
| 434 |
} |
| 435 |
return 1; |
| 436 |
} |