mount your n900 on a n900 ![]()

n900 <-> n900
–> http://www.spin.de/hp/sebastian_m/blog/id/16637281#entrymain
![]() mailto, against spam! How to download a website?Sonstiges · 01. April 2011 05:04 · 4 Kommentare What if you wanna make an archive of a website?
What if you wanna do that daily?
So?
This tool/library is not ready for real use; it shall become a library, but api access is not fully So what does this software?
here's the code for the library 1 #include <stdio.h> 2 #include <stdbool.h> 3 #include <string.h> 4 #include <strings.h> 5 #include <libxml/parser.h> 6 #include <libxml/HTMLparser.h> 7 #include <libxml/xmlerror.h> 8 #include <curl/curl.h> 9 #include <pthread.h> // 10 11 #include "getpage.h" 12 13 14 #define FILELENGTH 150 15 #define CURL_TIMEOUT_SEC 240 16 #define SELECT_TIMEOUT_SEC 10 17 #define MAX_P_FILE_DOWNLOADS 10 18 19 //#define DEBUG 20 21 static char ALPHABET[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwx 22 yz1234567890"; 23 24 enum FILETYPE 25 { 26 IMG = 0x1, 27 STYLE = 0x2, 28 SCRIPT = 0x4, 29 IFRAME = 0x8, 30 FRAME = 0x10, 31 PDF = 0x20, 32 CSS_IMG = 0x40, 33 NONE = 0x80 34 }; 35 36 struct _replace_info 37 { 38 char *begin; 39 char end; 40 41 42 void (*userfunction) (void*, char*, int, bool); 43 void *userdata; 44 45 char *buffer; 46 int begin_progress; 47 int begin_length; 48 bool inside_gap; 49 int status; 50 }; 51 52 struct _site_files 53 { 54 char *url; 55 char *url2; 56 char *filename; 57 enum FILETYPE ft; 58 struct _site_files *next; 59 FILE *fp; 60 struct _replace_info *ri; 61 short nth_url; 62 }; 63 64 struct _site_userdata 65 { 66 //void (*site_function)(void*, const char*, ...); 67 void (*site_function)(void*, const char*, va_list); 68 void *userdata; 69 70 struct _site_files *sf; 71 char *_base_url; 72 char *_css_base_url; 73 bool _utf8_meta_set; 74 CURL *_mhnd; 75 CURL *_hnd; 76 }; 77 78 struct _css_filter_userdata 79 { 80 struct _site_userdata *su; 81 char *url; 82 }; 83 84 struct _css_filter_save_userdata 85 { 86 struct _site_userdata *su; 87 FILE *fp; 88 char *filename; 89 char *url; 90 }; 91 92 static char *_filetype_string(enum FILETYPE ft) 93 { 94 char *txt; 95 switch (ft) 96 { 97 case IMG: 98 txt = "IMG"; 99 break; 100 case CSS_IMG: 101 txt = "CSS_IMG"; 102 break; 103 case STYLE: 104 txt = "STYLE"; 105 break; 106 case SCRIPT: 107 txt = "SCRIPT"; 108 break; 109 case IFRAME: 110 txt = "IFRAME"; 111 break; 112 case FRAME: 113 txt = "FRAME"; 114 break; 115 case PDF: 116 txt = "PDF"; 117 break; 118 case NONE: 119 txt = "OTHER"; 120 break; 121 default: 122 txt = "DEFAULT"; 123 break; 124 } 125 126 return txt; 127 } 128 129 static void _user_function(struct _site_userdata *su, const char *fmt, ...) 130 { 131 va_list ap; 132 va_start(ap, fmt); 133 su->site_function(su->userdata, fmt, ap); 134 va_end(ap); 135 } 136 137 static char *__join_together(char *a, char *b, int len_b) 138 { 139 int len_a = 0; 140 int i; 141 char *new; 142 143 if (a != NULL) 144 len_a += strlen(a); 145 146 new = realloc(a, len_b+1+len_a); 147 148 if (new != NULL) 149 { 150 for (i = 0; i < len_b; i++) 151 new[i+len_a] = b[i]; 152 153 new[len_a+len_b] = '\0'; 154 } 155 156 return new; 157 } 158 159 // return true if inside gap -> 1 160 // return false if outside gap -> -1 161 static int inline replace_step(struct _replace_info *ri, char txt) 162 { 163 if (txt == ri->begin[ri->begin_progress]) 164 ri->begin_progress++; 165 else 166 ri->begin_progress = 0; 167 168 if (ri->begin_progress == ri->begin_length) 169 { 170 ri->begin_progress = 0; 171 ri->inside_gap = true; 172 return -1; 173 } 174 175 if (ri->inside_gap) 176 { 177 if (txt == ri->end) 178 { 179 ri->inside_gap = false; 180 return -1; 181 } 182 else 183 return 1; 184 } 185 186 return -1; 187 } 188 189 static void replace(struct _replace_info *ri, char *txt, int length) 190 { 191 int i; 192 int offset = 0; 193 int status_temp = -1; 194 195 196 for (i = 0; i < length; i++) 197 { 198 status_temp = replace_step(ri, txt[i]); 199 200 if (ri->status != status_temp) 201 { 202 if (ri->buffer != NULL) 203 { 204 if (ri->status == 1) 205 { 206 ri->userfunction(ri->userdata, ri->buffer, strlen(ri-> 207 buffer), true); 208 } 209 else if (ri->status == -1) 210 { 211 ri->userfunction(ri->userdata, ri->buffer, strlen(ri-> 212 buffer), false); 213 } 214 free(ri->buffer); 215 ri->buffer = NULL; 216 } 217 218 if (ri->status == 1) 219 { 220 ri->userfunction(ri->userdata, txt+offset, i-offset, true); 221 } 222 else if (ri->status == -1) 223 { 224 ri->userfunction(ri->userdata, txt+offset, i-offset, false); 225 } 226 227 offset = i; 228 } 229 ri->status = status_temp; 230 } 231 232 if (offset != length) 233 { 234 if (status_temp == 1 || status_temp == -1) 235 { 236 ri->userfunction(ri->userdata, txt+offset, i-offset, ri->status 237 == 1 ? true : false); 238 } 239 else 240 { 241 if (txt[length-1] == '\0') 242 { 243 if (ri->buffer != NULL) 244 { 245 ri->userfunction(ri->userdata, ri->buffer, strlen(ri-> 246 buffer), false); 247 } 248 free(ri->buffer); 249 ri->buffer = NULL; 250 ri->userfunction(ri->userdata, txt+offset, length-offset, 251 false); 252 } 253 else 254 ri->buffer = __join_together(ri->buffer, txt+offset, length- 255 offset); 256 } 257 } 258 259 } 260 261 static void _set_chnd(CURL *hnd, char *url, void *cbfunction, void 262 *userdata) 263 { 264 curl_easy_setopt(hnd, CURLOPT_INFILESIZE_LARGE, (curl_off_t)-1); 265 curl_easy_setopt(hnd, CURLOPT_URL, url); 266 curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1); 267 curl_easy_setopt(hnd, CURLOPT_FAILONERROR, 0); 268 curl_easy_setopt(hnd, CURLOPT_USERAGENT, "libmessage - btwotch+ 269 libmessage@XXX.com"); 270 //curl_easy_setopt(hnd, CURLOPT_USERAGENT, "Mozilla/5.0 (X11; U; Linux 271 x86_64; en-US) AppleWebKit/534.3 (KHTML, like Gecko) 272 Chrome/6.0.472.62 Safari/534.3"); 273 curl_easy_setopt(hnd, CURLOPT_RESUME_FROM_LARGE, (curl_off_t)0); 274 curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50); 275 curl_easy_setopt(hnd, CURLOPT_SSLVERSION, 0); 276 curl_easy_setopt(hnd, CURLOPT_TIMECONDITION, 0); 277 curl_easy_setopt(hnd, CURLOPT_TIMEVALUE, 0); 278 curl_easy_setopt(hnd, CURLOPT_CUSTOMREQUEST, NULL); 279 curl_easy_setopt(hnd, CURLOPT_CONNECTTIMEOUT, CURL_TIMEOUT_SEC); 280 curl_easy_setopt(hnd, CURLOPT_TIMEOUT, CURL_TIMEOUT_SEC); 281 curl_easy_setopt(hnd, CURLOPT_HTTPAUTH, 1); 282 curl_easy_setopt(hnd, CURLOPT_ENCODING, NULL); 283 curl_easy_setopt(hnd, CURLOPT_IPRESOLVE, 0); 284 curl_easy_setopt(hnd, CURLOPT_IGNORE_CONTENT_LENGTH, 0); 285 curl_easy_setopt(hnd, CURLOPT_POSTREDIR, 0); 286 curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, cbfunction); 287 curl_easy_setopt(hnd, CURLOPT_WRITEDATA, userdata); 288 curl_easy_setopt(hnd, CURLOPT_FOLLOWLOCATION, 1); 289 curl_easy_setopt(hnd, CURLOPT_NOSIGNAL, 1); 290 curl_easy_setopt(hnd, CURLOPT_AUTOREFERER, 1); 291 curl_easy_setopt(hnd, CURLOPT_ENCODING, "deflate"); 292 } 293 294 static void _filename_gen(struct _site_files *first_sf, char *filename) 295 { 296 int i; 297 bool name_double; 298 struct _site_files *sf; 299 300 do 301 { 302 name_double = false; 303 srand(1337^filename[0]); 304 305 for (i = FILELENGTH/2; i < FILELENGTH; i++) 306 filename[i] = ALPHABET[rand()% (strlen(ALPHABET)-1)]; 307 308 filename[FILELENGTH-1] = '\0'; 309 310 sf = first_sf; 311 312 while (sf != NULL && sf->filename != NULL) 313 { 314 if (!strcasecmp(sf->filename, filename)) 315 name_double = true; 316 317 sf = sf->next; 318 } 319 } 320 while (name_double); 321 } 322 323 324 static char* _shrink_url(char *rurl) // remove apostrophes etc. 325 { 326 int length; 327 328 while (rurl[0] != '\0' && rurl[0] == ' ') 329 rurl++; 330 length = strlen(rurl); 331 for (int i = 0; i < length/2; i++) 332 if (rurl[i] == '\'' || rurl[i] == '\"') 333 { 334 if (rurl[i] == rurl[length-i-1]) 335 { 336 rurl[length-i-1] = '\0'; 337 rurl++; 338 } 339 } 340 else 341 break; 342 343 344 return rurl; 345 } 346 347 static void _crap_sites_aburl(char **abs_url, CURL *hnd, char *rurl, char 348 *static_burl) 349 { 350 int abs_urllen; 351 352 if (!strncasecmp(rurl, "//", 2)) // gmx-hack 353 { 354 abs_urllen = 5+strlen(rurl)+1; 355 *abs_url = malloc(abs_urllen*sizeof(char)); 356 snprintf(*abs_url, abs_urllen, "http:%s", rurl); 357 } 358 359 } 360 361 static void _relative_aburl(char **abs_url, CURL *hnd, char *rurl, char 362 *static_burl, short nth_url) 363 { 364 int abs_urllen; 365 int domain_end = 0, i; 366 int base_len = 0; 367 char *burl = NULL; 368 369 if (strncasecmp(rurl, "http://", 7) && 370 strncasecmp(rurl, "https://", 8) && 371 strncasecmp(rurl, "ftp://", 6) && 372 strncasecmp(rurl, "file://", 7) && 373 strncasecmp(rurl, "about:", 6) && 374 strncasecmp(rurl, "javascript:", 11)) 375 { 376 if (static_burl != NULL) 377 burl = static_burl; 378 else if (curl_easy_getinfo(hnd, CURLINFO_EFFECTIVE_URL, &burl) != 379 CURLE_OK) 380 { 381 fprintf(stderr, "CURLINFO_EFFECTIVE_URL failed\n"); 382 exit(1); 383 } 384 385 if (!strncasecmp(burl, "http://", 7)) 386 domain_end = 7; 387 else if (!strncasecmp(burl, "https://", 8)) 388 domain_end = 8; 389 else if (!strncasecmp(burl, "ftp://", 6)) 390 domain_end = 6; 391 else if (!strncasecmp(burl, "file://", 6)) 392 domain_end = strlen(burl); 393 394 for (i = domain_end+1; i < strlen(burl); i++) 395 { 396 if (burl[i] == '/') 397 { 398 if (i < strlen(burl)-1) 399 if (burl[i+1] == '/') 400 continue; 401 402 if (nth_url == 1) 403 { 404 base_len = i; 405 break; 406 } 407 else 408 nth_url--; 409 } 410 } 411 if (base_len == 0) 412 base_len = strlen(burl); 413 414 abs_urllen = strlen(rurl) + strlen(burl) + 2; 415 *abs_url = malloc(sizeof(char)*abs_urllen); 416 snprintf(*abs_url, abs_urllen, "%.*s/%s", base_len, burl, rurl); 417 } 418 419 } 420 421 static char* _absolute_url(CURL *hnd, char *rurl, char *static_burl, short 422 nth_url) 423 { 424 char *abs_url = NULL; 425 426 if (nth_url == 1) 427 { 428 _crap_sites_aburl(&abs_url, hnd, rurl, static_burl); 429 if (abs_url != NULL) 430 return abs_url; 431 } 432 433 _relative_aburl(&abs_url, hnd, rurl, static_burl, nth_url); 434 if (abs_url != NULL) 435 return abs_url; 436 437 if (nth_url == 1) 438 { 439 int abs_urllen = strlen(rurl)+1; 440 abs_url = malloc(abs_urllen+1); 441 strncpy(abs_url, rurl, abs_urllen); 442 //abs_url = strdup(rurl); 443 } 444 445 return abs_url; 446 447 } 448 449 static char *_site_files_add(struct _site_userdata *su, char *url, char 450 *base_url, enum FILETYPE ft) 451 { 452 struct _site_files *sf = su->sf; 453 char *newurl, *newfilename, *sec_url; 454 int i; 455 int url_length; 456 int filename_length; 457 458 if (url == NULL) 459 return NULL; 460 461 url_length = strlen(url)+1; 462 463 url = _shrink_url(url); 464 newurl = _absolute_url(su->_hnd, url, (base_url == NULL) ? su->_base_url : 465 base_url, 1); 466 sec_url = _absolute_url(su->_hnd, url, (base_url == NULL) ? su->_base_url 467 : base_url, 2); 468 469 if (sf != NULL) 470 { 471 if (!strcmp(sf->url, newurl)) 472 { 473 free(newurl); 474 return sf->filename; 475 } 476 while (sf->next != NULL) 477 { 478 sf = sf->next; 479 if (!strcmp(sf->url, newurl)) 480 { 481 free(newurl); 482 return sf->filename; 483 } 484 } 485 sf->next = malloc(sizeof(struct _site_files)); 486 sf = sf->next; 487 sf->ri = NULL; 488 sf->next = NULL; 489 } 490 else 491 { 492 sf = malloc(sizeof(struct _site_files)); 493 sf->ri = NULL; 494 su->sf = sf; 495 sf->next = NULL; 496 } 497 498 sf->filename = NULL; 499 sf->ft = ft; 500 sf->url = newurl; 501 sf->url2 = sec_url; 502 503 504 filename_length = strlen(newurl)+1; 505 if (filename_length > FILELENGTH) 506 filename_length = FILELENGTH; 507 508 newfilename = malloc(sizeof(char)*(filename_length)); 509 strncpy(newfilename, sf->url, filename_length); 510 if (filename_length == FILELENGTH) 511 _filename_gen(sf, newfilename); 512 513 sf->filename = newfilename; 514 515 for (i = 0; i < strlen(sf->filename); i++) 516 { 517 if (sf->filename[i] == '/') 518 sf->filename[i] = '_'; 519 else if (sf->filename[i] == '?') 520 sf->filename[i] = '_'; 521 else if (sf->filename[i] == '#') 522 sf->filename[i] = '_'; 523 else if (sf->filename[i] == '@') 524 sf->filename[i] = '_'; 525 else if (sf->filename[i] == '%') 526 sf->filename[i] = '_'; 527 else if (sf->filename[i] == ':') 528 sf->filename[i] = '_'; 529 else if (sf->filename[i] == ' ') 530 sf->filename[i] = '_'; 531 } 532 533 return sf->filename; 534 } 535 536 void _save_file_css_save(void *userdata, char *gap, int length, bool gapped) 537 { 538 struct _css_filter_save_userdata *cfsu = (struct 539 _css_filter_save_userdata*) 540 userdata; 541 char *filename; 542 543 if (cfsu->fp == NULL) 544 { // first call of this func. 545 cfsu->fp = fopen(cfsu->filename, "w"); 546 cfsu->url = NULL; 547 } 548 549 if (gapped) 550 cfsu->url = __join_together(cfsu->url, gap, length); 551 else if (!gapped && cfsu->url != NULL) 552 { 553 filename = _site_files_add(cfsu->su, cfsu->url, cfsu->su-> 554 _css_base_url, CSS_IMG); 555 fprintf(cfsu->fp, "%s", filename); 556 free(cfsu->url); 557 cfsu->url = NULL; 558 fprintf(cfsu->fp, "%.*s", length, gap); 559 } 560 else 561 fprintf(cfsu->fp, "%.*s", length, gap); 562 563 } 564 565 566 size_t _save_file_css(char *txt, size_t size, size_t nmemb, struct 567 _site_files *sf) // feed the replacer! 568 { 569 if (size == 0 && nmemb == 0 && sf->fp != NULL) 570 { 571 fclose(sf->fp); 572 } 573 else if (sf->fp == NULL) 574 sf->fp=fopen(sf->filename, "w"); 575 576 if (sf->fp == NULL) 577 { 578 perror("fopen"); 579 return 0; 580 } 581 582 replace(sf->ri, txt, size*nmemb); 583 584 return size*nmemb; 585 } 586 587 size_t _save_file(char *txt, size_t size, size_t nmemb, struct _site_files 588 *sf) 589 { 590 int i; 591 592 if (size == 0 && nmemb == 0 && sf->fp != NULL) 593 { 594 fclose(sf->fp); 595 } 596 else if (sf->fp == NULL) 597 sf->fp=fopen(sf->filename, "w"); 598 599 if (sf->fp == NULL) 600 { 601 perror("fopen"); 602 return 0; 603 } 604 605 for (i = 0; i < size*nmemb; i++) 606 fputc(txt[i], sf->fp); 607 608 return size*nmemb; 609 } 610 611 static void _set_css_ri(struct _replace_info *ri, void *userdata, void 612 *userfunction) 613 { 614 ri->begin = "url("; 615 ri->end = ')'; 616 617 ri->userfunction = userfunction; 618 ri->userdata = userdata; 619 620 ri->buffer = NULL; 621 622 ri->begin_progress = 0; 623 ri->begin_length = 4; 624 ri->inside_gap = false; 625 626 ri->status = -1; 627 } 628 629 static void _add_download_files(struct _site_files *sf, struct 630 _site_userdata *su, CURL *mhnd, short 631 nth_url) 632 { 633 struct _css_filter_save_userdata *cfsu; 634 CURL *hnd; 635 636 hnd = curl_easy_init(); 637 sf->fp = NULL; 638 sf->nth_url = nth_url; 639 640 641 #ifdef DEBUG 642 fprintf(stderr, "Download (%s) %s -> %s\n", _filetype_string(sf->ft), sf-> 643 url, sf->filename); 644 #endif 645 646 if (sf->ft == STYLE) 647 { 648 sf->ri = malloc(sizeof(struct _replace_info)); 649 su->_css_base_url = sf->url; 650 cfsu = malloc(sizeof(struct _css_filter_save_userdata)); 651 cfsu->fp = NULL; 652 cfsu->filename = sf->filename; 653 cfsu->su = su; 654 _set_css_ri(sf->ri, cfsu, _save_file_css_save); 655 if (nth_url == 1) 656 _set_chnd(hnd, sf->url, _save_file_css, sf); 657 else if (nth_url == 2) 658 _set_chnd(hnd, sf->url2, _save_file_css, sf); 659 } 660 else if (nth_url == 1) 661 _set_chnd(hnd, sf->url, _save_file, sf); 662 else if (nth_url == 2) 663 _set_chnd(hnd, sf->url2, _save_file, sf); 664 665 curl_easy_setopt(hnd, CURLOPT_PRIVATE, sf); 666 curl_multi_add_handle(mhnd, hnd); 667 } 668 669 670 static void _download_files(struct _site_userdata *su) 671 { 672 int handles = 1, msgs_in_queue, maxfd; 673 int iteration = 0; 674 int downloads = 0; // current downloads 675 char *curlinfo_private; 676 CURL *mhnd; 677 CURLMsg *cmsg; 678 struct _site_files *first_sf = su->sf; 679 struct _site_files *sf = first_sf; 680 struct _site_files *tmp_sf; 681 struct timeval timeout; 682 fd_set fdread, fdwrite, fderr; 683 char *burl; 684 #ifdef DEBUG 685 char *ip; 686 #endif 687 long response_code; 688 689 if (sf == NULL) 690 return; 691 mhnd = curl_multi_init(); 692 693 _add_download_files(sf, su, mhnd, 1); 694 downloads++; 695 sf = sf->next; 696 697 while (CURLM_CALL_MULTI_PERFORM == curl_multi_perform(mhnd, &handles) && 698 handles != 0); 699 700 do 701 { 702 iteration++; 703 FD_ZERO(&fdread); 704 FD_ZERO(&fdwrite); 705 FD_ZERO(&fderr); 706 timeout.tv_sec = SELECT_TIMEOUT_SEC; 707 timeout.tv_usec = 0; 708 curl_multi_fdset(mhnd, &fdread, &fdwrite, &fderr, &maxfd); 709 switch (select(maxfd+1, &fdread, &fdwrite, &fderr, &timeout)) 710 { 711 case -1: 712 #ifdef DEBUG 713 fprintf(stderr, "select bad :(\n"); 714 perror("!!! select failed "); 715 while ((cmsg = curl_multi_info_read(mhnd, &msgs_in_queue)) != 716 NULL) 717 { 718 if (cmsg->data.result != 0) 719 { 720 curl_easy_getinfo(cmsg->easy_handle, CURLINFO_PRIMARY_IP, 721 &ip); 722 fprintf(stderr, "ip: %s url: %s result: %d", ip, burl, 723 cmsg->data.result); 724 if (cmsg->data.result == 7) 725 fprintf(stderr, " (couldn't connect)"); 726 fprintf(stderr, "\n"); 727 } 728 } 729 fprintf(stderr, "-----------\n"); 730 #endif 731 default: 732 while ((cmsg = curl_multi_info_read(mhnd, &msgs_in_queue)) != 733 NULL) 734 if (cmsg->msg == CURLMSG_DONE) 735 { 736 curl_easy_getinfo(cmsg->easy_handle, CURLINFO_PRIVATE, 737 &curlinfo_private); 738 tmp_sf = (struct _site_files*)curlinfo_private; 739 if (tmp_sf->ft == CSS_IMG) 740 _save_file_css(NULL, 0, 0, tmp_sf); 741 else 742 _save_file(NULL, 0, 0, tmp_sf); 743 744 downloads--; 745 746 curl_easy_getinfo(cmsg->easy_handle, CURLINFO_EFFECTIVE_URL, 747 &burl); 748 curl_easy_getinfo(cmsg->easy_handle, CURLINFO_RESPONSE_CODE, 749 &response_code); 750 if (response_code > 400) 751 { 752 if (tmp_sf->nth_url == 2) 753 { 754 fprintf(stderr, "Failed (%ld): %s -> %s (%s) ", 755 response_code, burl, tmp_sf->filename, 756 _filetype_string(tmp_sf->ft)); 757 fprintf(stderr, "second url: %s\n", tmp_sf->url2); 758 } 759 else 760 _add_download_files(tmp_sf, su, mhnd, 2); 761 } 762 763 curl_easy_cleanup(cmsg->easy_handle); 764 } 765 do 766 { 767 // download 1st file 768 if (iteration == 1 && sf != NULL && downloads < 769 MAX_P_FILE_DOWNLOADS) 770 { 771 _add_download_files(sf, su, mhnd, 1); 772 downloads++; 773 } 774 775 776 while (sf != NULL && sf->next != NULL && downloads < 777 MAX_P_FILE_DOWNLOADS) // sf->next must not be NULL as 778 we are adding to the list ;) 779 { 780 _add_download_files(sf->next, su, mhnd, 1); 781 downloads++; 782 sf = sf->next; 783 } 784 } 785 while (CURLM_CALL_MULTI_PERFORM == curl_multi_perform(mhnd, 786 &handles) && handles != 0); 787 788 break; 789 790 } 791 } 792 while (handles != 0); 793 794 while ((cmsg = curl_multi_info_read(mhnd, &msgs_in_queue)) != NULL) 795 { 796 curl_easy_getinfo(cmsg->easy_handle, CURLINFO_PRIVATE, 797 &curlinfo_private); 798 tmp_sf = (struct _site_files*)curlinfo_private; 799 if (tmp_sf->ft == CSS_IMG) 800 _save_file_css(NULL, 0, 0, tmp_sf); 801 else 802 _save_file(NULL, 0, 0, tmp_sf); 803 downloads--; 804 805 curl_easy_getinfo(cmsg->easy_handle, CURLINFO_EFFECTIVE_URL, &burl); 806 curl_easy_getinfo(cmsg->easy_handle, CURLINFO_RESPONSE_CODE, 807 &response_code); 808 if (response_code > 400) 809 { 810 if (tmp_sf->nth_url == 2) 811 { 812 fprintf(stderr, "Failed (%ld): %s -> %s (%s) ", 813 response_code, burl, tmp_sf->filename, 814 _filetype_string(tmp_sf->ft)); 815 fprintf(stderr, "second url: %s\n", tmp_sf->url2); 816 } 817 else 818 _add_download_files(tmp_sf, su, mhnd, 2); 819 } 820 821 curl_easy_cleanup(cmsg->easy_handle); 822 } 823 824 sf = first_sf; 825 while (sf != NULL) 826 { 827 free(sf->url); 828 free(sf->url2); 829 free(sf->filename); 830 if (sf->ri != NULL) 831 { 832 free(sf->ri->userdata); 833 free(sf->ri); 834 } 835 tmp_sf = sf; 836 sf = sf->next; 837 free(tmp_sf); 838 } 839 840 curl_multi_cleanup(mhnd); 841 842 } 843 844 void _css_filter(void *userdata, char *gap, int length, bool gapped) 845 { 846 struct _css_filter_userdata *cfu = (struct _css_filter_userdata*) 847 userdata; 848 char *filename; 849 850 if (gapped) 851 cfu->url = __join_together(cfu->url, gap, length); 852 else if (!gapped && cfu->url != NULL) 853 { 854 filename = _site_files_add(cfu->su, cfu->url, NULL, CSS_IMG); 855 _user_function(cfu->su, "%s", filename); 856 free(cfu->url); 857 cfu->url = NULL; 858 _user_function(cfu->su, "%.*s", length, gap); 859 } 860 else 861 _user_function(cfu->su, "%.*s", length, gap); 862 } 863 864 865 static void _getpage_startElementSAX (void * userData, const xmlChar * name, 866 const xmlChar ** atts) 867 { 868 int i, j; 869 char *n = (char*)name; 870 char *filename, *url; 871 struct _site_userdata *su = userData; 872 struct _css_filter_userdata cfu; 873 struct _replace_info ri; 874 875 876 _user_function(su, "<%s", n); 877 878 if (atts != NULL) 879 for (i = 0; atts[i] != NULL; i+=2) 880 { 881 filename = NULL; 882 883 if (!strncasecmp(n, "img", 4) && !strncasecmp((char*)atts[i], "src", 884 4)) 885 { 886 filename = _site_files_add(su, (char*)atts[i+1], NULL, IMG); 887 _user_function(su, " src=\"file:%s\"", filename); 888 } 889 else if (!strncasecmp(n, "input", 6) && !strncasecmp((char*)atts[i], 890 "src", 4)) 891 { 892 filename = _site_files_add(su, (char*)atts[i+1], NULL, IMG); 893 _user_function(su, " src=\"file:%s\"", filename); 894 } 895 else if (!strncasecmp(n, "script", 7) && !strncasecmp((char*)atts[i] 896 , "src", 4)) 897 { 898 filename = _site_files_add(su, (char*)atts[i+1], NULL, SCRIPT); 899 _user_function(su, " src=\"file:%s\"", filename); 900 } 901 else if (!strncasecmp(n, "iframe", 7) && !strncasecmp((char*)atts[i] 902 , "src", 4)) 903 { 904 filename = _site_files_add(su, (char*)atts[i+1], NULL, IFRAME); 905 _user_function(su, " src=\"file:%s\"", filename); 906 } 907 else if (!strncasecmp((char*)atts[i], "style", 6)) 908 { 909 cfu.su = su; 910 cfu.url = NULL; 911 _set_css_ri(&ri, &cfu, _css_filter); 912 _user_function(su, " style=\""); 913 replace(&ri, (char*)atts[i+1], strlen((char*)atts[i+1])); 914 if (cfu.url != NULL) 915 free(cfu.url); 916 _user_function(su, "\""); 917 filename = (void*)-1; 918 } 919 else if (!strncasecmp(n, "link", 5) && !strncasecmp((char*)atts[i], 920 "href", 5)) 921 { 922 for (j = 0; atts[j] != NULL; j+=2) 923 if (!strncasecmp((char*)atts[j], "rel", 4)) 924 { 925 if (!strncasecmp((char*)atts[j+1], "stylesheet", 11)) 926 { 927 filename = _site_files_add(su, (char*)atts[i+1], NULL, 928 STYLE); 929 _user_function(su, " href=\"file:%s\"", filename); 930 } 931 else if (!strncasecmp((char*)atts[j+1], "icon", 5)) 932 { 933 filename = _site_files_add(su, (char*)atts[i+1], NULL, 934 IMG); 935 _user_function(su, " href=\"file:%s\"", filename); 936 } 937 else if (!strncasecmp((char*)atts[j+1], "shortcut icon", 938 14)) 939 { 940 filename = _site_files_add(su, (char*)atts[i+1], NULL, 941 IMG); 942 _user_function(su, " href=\"file:%s\"", filename); 943 } 944 } 945 } 946 else if (!strncasecmp(n, "a", 2) && !strncasecmp((char*)atts[i], 947 "href", 5)) 948 { 949 url = _absolute_url(su->_hnd, (char*)atts[i+1], su->_base_url, 950 1); 951 _user_function(su, " href=\"%s\"", url); 952 free(url); 953 filename = (void*)-1; 954 } 955 else if (!strncasecmp(n, "base", 5) && !strncasecmp((char*)atts[i], 956 "href", 5)) 957 { 958 _user_function(su, " href=\".\""); 959 filename = (void*)-1; 960 } 961 else if (!strncasecmp(n, "form", 5) && !strncasecmp((char*)atts[i], 962 "action", 7)) 963 { 964 url = _absolute_url(su->_hnd, (char*)atts[i+1], su->_base_url, 965 1); 966 _user_function(su, " action=\"%s\"", url); 967 free(url); 968 filename = (void*)-1; 969 } 970 else if (!strncasecmp(n, "meta", 5) && !strncasecmp((char*)atts[i], 971 "http-equiv", 8) && !strncasecmp((char*)atts[i+1], 972 "Content-Type", 13)) 973 { 974 su->_utf8_meta_set = true; 975 _user_function(su, " http-equiv=\"Content-Type\" 976 content=\"text/html; charset=utf-8\""); 977 //filename = (void*)-1; 978 break; 979 } 980 981 982 if (filename == NULL) 983 _user_function(su, " %s=\"%s\"", (char*)atts[i], (char*)atts[i+1]) 984 ; 985 986 } 987 988 _user_function(su, ">"); 989 990 } 991 992 static void _getpage_endElementSAX (void * userData, const xmlChar * name) 993 { 994 char *n = (char*)name; 995 struct _site_userdata *su = userData; 996 997 if (!strncasecmp("head", n, 5) && !su->_utf8_meta_set) 998 _user_function(su, "<meta http-equiv=\"Content-Type\" 999 content=\"text/html; charset=utf-8\"/> </head>"); 1000 else if (strncasecmp("br", n, 3) && strncasecmp("img", n, 4) && 1001 strncasecmp("meta", n, 5) && strncasecmp("link", n, 5) && 1002 strncasecmp("input", n, 5)) 1003 _user_function(su, "</%s>\n", n); 1004 } 1005 1006 static void _getpage_charDataSAX (void * userData, const xmlChar * buffer, 1007 int length) 1008 { 1009 struct _site_userdata *su = userData; 1010 _user_function(su, "%.*s", length, buffer); 1011 } 1012 1013 static size_t _chunk_parse(void *ptr, size_t size, size_t nmemb, 1014 xmlParserCtxtPtr ctxt) 1015 { 1016 char *txt = ptr; 1017 #ifdef DEBUG 1018 FILE *fp = fopen("bare.txt", "a+"); 1019 1020 fprintf(fp, "%.*s", (int)(size*nmemb), txt); 1021 fclose(fp); 1022 #endif 1023 htmlParseChunk(ctxt, txt, size*nmemb, 0); 1024 1025 return nmemb*size; 1026 } 1027 1028 void getpage(char *url, void *site_function, void *userdata) 1029 { 1030 struct _site_userdata su; 1031 su.site_function = site_function; 1032 su.userdata = userdata; 1033 su.sf = NULL; 1034 su._utf8_meta_set = false; 1035 su._base_url = NULL; 1036 su._css_base_url = NULL; 1037 CURLcode ret; 1038 1039 htmlSAXHandler hsh; 1040 htmlParserCtxtPtr ctxt; 1041 1042 #ifdef DEBUG 1043 remove("bare.txt"); 1044 #endif 1045 1046 memset(&hsh, 0, sizeof(htmlSAXHandler)); 1047 1048 hsh.startElement = _getpage_startElementSAX; 1049 hsh.endElement = _getpage_endElementSAX; 1050 hsh.characters = _getpage_charDataSAX; 1051 1052 1053 ctxt = htmlCreatePushParserCtxt(&hsh, &su, NULL, 0, NULL, 1054 XML_CHAR_ENCODING_UTF8); 1055 htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER); 1056 1057 curl_global_init(CURL_GLOBAL_ALL); 1058 su._hnd = curl_easy_init(); 1059 _set_chnd(su._hnd, url, _chunk_parse, ctxt); 1060 ret = curl_easy_perform(su._hnd); 1061 1062 htmlParseChunk(ctxt, NULL, 0, 1); 1063 htmlFreeParserCtxt(ctxt); 1064 1065 curl_easy_getinfo(su._hnd, CURLINFO_EFFECTIVE_URL, &su._base_url); 1066 1067 #ifdef DEBUG 1068 double val; 1069 if (curl_easy_getinfo(su._hnd, CURLINFO_SPEED_DOWNLOAD, &val) == CURLE_OK) 1070 printf("Average download speed: %0.3f kbyte/sec.\n", val / 1024); 1071 #endif 1072 1073 fprintf(stderr, "Downloading files ...\n"); 1074 _download_files(&su); 1075 curl_easy_cleanup(su._hnd); 1076 // curl_global_cleanup(); 1077 } And now the code for the demo program: 1 #include <stdio.h> 2 #include <stdarg.h> 3 4 #include "getpage.h" 5 6 7 void site_function(void *userdata, const char* format, va_list ap) 8 { 9 FILE *fp = userdata; 10 11 vfprintf(fp, format, ap); 12 fflush(fp); 13 } 14 15 16 int main(int argc, char **argv) 17 { 18 19 FILE *fp = fopen(argv[2], "w"); 20 if (fp == NULL) 21 return -1; 22 23 getpage(argv[1], site_function, fp); 24 25 fclose(fp); 26 }
#!/bin/sh
pages="www.google.de www.heise.de www.sueddeutsche.de www.n-tv.de www.golem.de www.faz.net www.cnn.com nytimes.com focus.de www.n24.de apple.com derstandard.at bild.de www.spin.de bbc.co.uk"
for i in $pages
do
#( ../getpagetest $i "$i.html" && chromium "$i.html" &)
valgrind --leak-check=full --log-file="$i.valgrind" ../getpagetest $i "$i.html" &
done
echo -e "\n\nWaiting ...\n\n"
wait `pidof getpagetest`
for i in $pages
do
echo Opening $i
chromium $i.html
doneTags: ansi c · curl · download · download website · getpage · html · libxml · libxml2 · website Receive ARP Packets via libpcapSonstiges · 14. März 2011 02:47 This is how it works: 1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <pcap.h> 4 #include <netinet/if_ether.h> 5 6 #define BUFSIZE 4096 7 #define ETH_HEADER_SIZE 14 8 9 #define ERROR_EXIT(x,y) do { fprintf(stderr, "%s: %s\n" x, y); exit(1);} 10 while (0); 11 12 void process_packet(u_char *user, const struct pcap_pkthdr *hdr, const 13 u_char *pkt) 14 { 15 struct ether_header *eth_header; 16 struct ether_arp *arp_packet; /* from if_eth.h */ 17 18 eth_header = (struct ether_header *) pkt; 19 arp_packet = (struct ether_arp *) (pkt + ETH_HEADER_SIZE); 20 21 if (ntohs (eth_header->ether_type) == ETHERTYPE_ARP) /* if it is an ARP 22 packet */ 23 { 24 printf ("Source: %d.%d.%d.%d\t\tDestination: %d.%d.%d.%d\n", 25 arp_packet->arp_spa[0], 26 arp_packet->arp_spa[1], 27 arp_packet->arp_spa[2], 28 arp_packet->arp_spa[3], 29 arp_packet->arp_tpa[0], 30 arp_packet->arp_tpa[1], 31 arp_packet->arp_tpa[2], 32 arp_packet->arp_tpa[3]); 33 } 34 35 } 36 37 int main() 38 { 39 char buf[BUFSIZE]; 40 41 bpf_u_int32 netp; 42 bpf_u_int32 maskp; 43 44 45 char *filter = "arp"; 46 struct bpf_program bpf; 47 char errbuf[PCAP_ERRBUF_SIZE]; 48 49 pcap_if_t *alldev; 50 pcap_t *handle; 51 52 if (pcap_findalldevs(&alldev, errbuf) < 0) 53 ERROR_EXIT("pcap_findalldevs", errbuf); 54 55 while (alldev != NULL) 56 { 57 printf("dev: %s\n", alldev->name); 58 alldev = alldev->next; 59 } 60 61 handle = pcap_open_live("wlan0", BUFSIZE, 0, 0, errbuf); 62 if (handle == NULL) 63 ERROR_EXIT("pcap_open_live", errbuf); 64 65 if (pcap_lookupnet ("wlan0", &netp, &maskp, errbuf) == -1) 66 ERROR_EXIT("pcap_lookupnet", errbuf); 67 68 if (pcap_compile(handle, &bpf, filter, 0, maskp) == -1) 69 ERROR_EXIT("pcap_compile", pcap_geterr(handle)); 70 71 if (pcap_setfilter(handle, &bpf) == -1) 72 ERROR_EXIT("pcap_setfilter", pcap_geterr(handle)); 73 74 pcap_freecode(&bpf); 75 76 if (pcap_loop(handle, 10, process_packet, NULL) < 0) 77 ERROR_EXIT("pcap_setfilter", pcap_geterr(handle)); 78 79 80 } Tags: libpcap · network · pcap · sniff Send ARP Request via libnetSonstiges · 14. März 2011 02:43 · 1 Kommentar This is what it looks like: 1 #include <stdio.h> 2 #include <libnet.h> 3 4 int main() 5 { 6 // libnet 7 libnet_t *l; 8 char errbuf[LIBNET_ERRBUF_SIZE]; 9 int ret; 10 11 //arp header 12 u_int8_t *macaddr; 13 struct libnet_ether_addr *hwaddr; 14 libnet_ptag_t arp = 0; 15 //ip header 16 in_addr_t srcip, destip; 17 //eth header 18 libnet_ptag_t eth = 0; 19 20 21 l = libnet_init(LIBNET_LINK, "wlan0", errbuf); 22 23 if (l == NULL) 24 { 25 fprintf(stderr, "err: %s\n", errbuf); 26 exit(1); 27 } 28 29 hwaddr = libnet_get_hwaddr(l); 30 31 destip = inet_addr("192.168.178.1"); 32 srcip = libnet_get_ipaddr4(l); 33 macaddr = libnet_hex_aton("ff:ff:ff:ff:ff:ff", &ret); 34 35 arp = libnet_autobuild_arp(ARPOP_REQUEST, (uint8_t*)hwaddr, (uint8_t*) 36 &srcip, macaddr, (uint8_t*)&destip, l); 37 eth = libnet_autobuild_ethernet(macaddr, ETHERTYPE_ARP, l); 38 39 40 if (libnet_write(l) == -1) 41 { 42 fprintf(stderr, "err: %s\n", errbuf); 43 exit(1); 44 } 45 46 47 return 0; 48 } based on: http://commons.oreilly.com/wiki/index.php/Network_Security_Tools/Modifying_and_Hacking_Security_Tools/Writing_Packet-Injection_Tools Current favorite internet radio streamsSonstiges · 12. März 2011 18:35
P.S.: btw. egofm wants you to send them an E-Mail to get the stream url (http://www.egofm.de/default.aspx?ID=6301) Tags: campus crew passau · ego fm · egofm · fm4 · internet · mp3 · pls · radio · stream · stream url ipv6 @homeComputer · 06. März 2011 20:29 I just tested traceroute6 at my parents internet connection (1und1.de): traceroute6 heise.de traceroute to heise.de (2a02:2e0:3fe:100::8) from 2002:5dc4:8c4a:0:21b:63ff:fe06:99a5, 30 hops max, 24 byte packets 1 fritz.box (2002:5dc4:8c4a:0:be05:43ff:fe1e:a622) 7.611 ms 2.317 ms 2.425 ms 2 * * * 3 te3-1.c302.f.de.plusline.net (2001:7f8::3012:0:2) 18.389 ms 18.804 ms 22.642 ms 4 te2-4.c102.f.de.plusline.net (2a02:2e0:1::5) 18.808 ms 22.216 ms 22.431 ms 5 te6-2.c13.f.de.plusline.net (2a02:2e0:1::22) 18.65 ms 17.314 ms 18.374 ms 6 redirector.heise.de (2a02:2e0:3fe:100::8) 19.172 ms 19.682 ms 21.527 ms UPDATE: some friends told me that this is probably not native but only a tunnel :( Tags: ipv6 Watching TV online - or why you should not trust "X-Forwarded-For"Sonstiges · 06. März 2011 17:26 · 3 Kommentare Yesterday, someone tweeted about http://wilmaa.ch - unfortunately this website is not available outside Suisse :( ![]() wilmaa windows client
![]() wilmaa wireshark
http://en.wikipedia.org/wiki/X-Forwarded-For So, to watch it in Linux, just use: curl --header "X-Forwarded-For: x.x.x.x" "wilmaa-url" | mplayer - where x.x.x.x is a suisse geoip (e.g. dig suisse.ch) and wilmaa-url is one of the urls wilmaa PortableApp provides :) Hint: If you don't want to install Windows, you can just sqlite3 to get the urls out of wilmaa's database.
|
Einträge anzeigen
Jahr 2012 (3) AbonnierenTags |