diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 0000000..7d60b4d --- /dev/null +++ b/NOTES.md @@ -0,0 +1,144 @@ +# Notes for feature/listing PR + +## TODO + +*Move to Document as done.* + +- Nomenclature, start,end,newest +- `navLeft()`: stabilize and accelerate + - flag `-list` + - remove `WaitReady("body",..)` + - pass in prevousLocation (&string ?) + - throw error if loc==prevLoc + - exponential backoff (capped), timeout (default with param?) +- `navN()` batch timing and reload, `-batch` batchSize flag, default 1000 +- flags: `-all`, `-list`, `-batch`, `-vt`, `-verify` +- flag `-vt`: (verboseTiming) Marginal Latency/Batch, navLeft: log.warning >3 iterations or >1s,1m +- skip if already downloaded (-optimistic) +- Termination criteria: lastPhoto(ctx) + - lastPhoto: explain Selector + - setup After Auth + - add to `navN()` +- Future listing with DOM selectors, and scrolling + +## Starting another profile + +```bash +'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' --user-data-dir=/var/folders/bw/7rvbq3q92g5bn4lv4hrh5qv40000gn/T/gphotos-cdp +``` + +## Watching + +- Show Histograms of navLeft + +```bash +cd ~/Downloads +for d in gphotos-cdp*; do echo $d $(find $d -type f|wc -l); done +while true; do cat gphotos-cdp/.lastdone ; echo; sleep 1; done +``` + +## Optimizing total time (peru) + +- document.querySelector('a[href^="./photo/"]').href; // first photo +- document.querySelector('[aria-label="View previous photo"]') +- document.querySelector('[aria-label="View next photo"]'); +- Pass in prevLocation to NavLeft +- ? Remove chromedp.WaitReady("body", chromedp.ByQuery) + +### An Argument for Batching + +```bash +$ time ./gphotos-cdp -dev -n 6001 -list +21:35:11 Session Dir: /var/folders/bw/7rvbq3q92g5bn4lv4hrh5qv40000gn/T/gphotos-cdp +21:35:11 Download Dir: /Users/daniel/Downloads/gphotos-cdp +21:35:14 Last Photo: ./photo/AF1QipMWCRwtfPHED45inXlRNjeg28oyrfhW5UA60Qjg (first on Landing Page) +21:35:19 Nav to the end sequence is started because location is https://photos.google.com/photo/AF1QipNS8lcfYzAxV1ji4yY5noyzUUlznuNE4h-qktrJ +21:35:19 NavToLast iteration: location is https://photos.google.com/photo/AF1QipNS8lcfYzAxV1ji4yY5noyzUUlznuNE4h-qktrJ +21:36:26 . Marginal Avg Latency (last 1000 @ 1000): 66.43ms Cumulative Avg Latency: 66.43ms +21:37:40 . Marginal Avg Latency (last 1000 @ 2000): 73.99ms Cumulative Avg Latency: 70.21ms +21:39:09 . Marginal Avg Latency (last 1000 @ 3000): 88.97ms Cumulative Avg Latency: 76.46ms +21:42:48 . Marginal Avg Latency (last 1000 @ 4000): 218.75ms Cumulative Avg Latency: 112.03ms +21:48:38 . Marginal Avg Latency (last 1000 @ 5000): 350.23ms Cumulative Avg Latency: 159.67ms +22:05:27 . Marginal Avg Latency (last 1000 @ 6000): 1008.58ms Cumulative Avg Latency: 301.16ms +22:05:27 Rate (6001): 3.32/s Avg Latency: 301.11ms +OK +1815.896s +``` + +With page reloading every 1000 images: + +```bash +$ time ./gphotos-cdp -dev -n 6001 -list +21:24:45 Session Dir: /var/folders/bw/7rvbq3q92g5bn4lv4hrh5qv40000gn/T/gphotos-cdp +21:24:45 Download Dir: /Users/daniel/Downloads/gphotos-cdp +21:24:48 Last Photo: ./photo/AF1QipMWCRwtfPHED45inXlRNjeg28oyrfhW5UA60Qjg (first on Landing Page) +21:24:52 Nav to the end sequence is started because location is https://photos.google.com/photo/AF1QipNS8lcfYzAxV1ji4yY5noyzUUlznuNE4h-qktrJ +21:24:52 NavToLast iteration: location is https://photos.google.com/photo/AF1QipNS8lcfYzAxV1ji4yY5noyzUUlznuNE4h-qktrJ +21:25:55 . Marginal Avg Latency (last 1000 @ 1000): 62.11ms Cumulative Avg Latency: 62.11ms +21:27:02 . Marginal Avg Latency (last 1000 @ 2000): 65.13ms Cumulative Avg Latency: 63.62ms +21:28:06 . Marginal Avg Latency (last 1000 @ 3000): 62.51ms Cumulative Avg Latency: 63.25ms +21:29:10 . Marginal Avg Latency (last 1000 @ 4000): 63.23ms Cumulative Avg Latency: 63.25ms +21:30:16 . Marginal Avg Latency (last 1000 @ 5000): 63.60ms Cumulative Avg Latency: 63.32ms +21:31:19 . Marginal Avg Latency (last 1000 @ 6000): 62.00ms Cumulative Avg Latency: 63.10ms +21:31:21 Rate (6001): 15.85/s Avg Latency: 63.09ms +OK +396.036s +``` + +## Just Listing + +```bash +# in batches + +time for i in `seq 1 31`; do echo =-=- iteration $i start; time ./gphotos-cdp -v -dev -n 1000; echo =-=- iteration $i done; done + +# From Scratch +2019/12/01 00:23:09 .. navLeft so far: 5m0.158025009s +2019/12/01 00:23:09 navLeft took 5m0.158049209s +2019/12/01 00:23:09 NavN iteration (7312): location is https://photos.google.com/photo/AF1QipORJSC-4iLwGPXtXiVMZsf2ZG8u6-XOegoijUDW +OK +4098.325s + +# Appended: +2019/12/01 01:49:40 .. navLeft so far: 5m0.354725675s +2019/12/01 01:49:40 navLeft took 5m0.354868401s +2019/12/01 01:49:40 NavN iteration (7493): location is https://photos.google.com/photo/AF1QipN98MoLk2V3gkGxzJuSe8nWqy6U7nudpBxLkuFs +OK +4562.572s +# Append +2019/12/01 03:12:15 navLeft took 438.758367ms +2019/12/01 03:12:15 NavN iteration (5193): location is https://photos.google.com/photo/AF1QipP0NnL5F31yWiEMgQdx75VaDXMI-099eN-ePTEx +2019/12/01 03:12:15 Marking https://photos.google.com/photo/AF1QipP0NnL5F31yWiEMgQdx75VaDXMI-099eN-ePTEx as done +^C +1087.231s + +# Append +2019/12/01 03:16:27 NavN iteration (2190): location is https://photos.google.com/photo/AF1QipPuPZJ-R6UTVgipHD8R-URdoB66hakxX0C4s1U7 +2019/12/01 03:16:27 Marking https://photos.google.com/photo/AF1QipPuPZJ-R6UTVgipHD8R-URdoB66hakxX0C4s1U7 as done +2019/12/01 03:16:27 .. navLeft so far: 7.674008ms +2019/12/01 03:16:27 .. navLeft so far: 109.60942ms +2019/12/01 03:16:27 .. navLeft so far: 262.405953ms +2019/12/01 03:16:27 .. navLeft so far: 369.708306ms +2019/12/01 03:16:27 .. navLeft so far: 477.549416ms +2019/12/01 03:16:27 .. navLeft so far: 579.120186ms +2019/12/01 03:16:28 .. navLeft so far: 686.254498ms +^C +264.910s +# Append +2019/12/01 03:20:28 NavN iteration (2574): location is https://photos.google.com/photo/AF1QipMl1tMQVHpxfkgpPMEd_Ko8eiTE8IsyvFX0f0Rf +2019/12/01 03:20:28 Marking https://photos.google.com/photo/AF1QipMl1tMQVHpxfkgpPMEd_Ko8eiTE8IsyvFX0f0Rf as done +2019/12/01 03:20:28 navLeft took 196.956415ms +^C +177.288s + +7312 + 7493 + 5193 + 2190 + 2574 + 4000 + 614 ++1000 67.465s, 76.261s, 71.233s, 74.092s ++614 + +``` + +https://photos.google.com/lr/photo/AICs3TPpjUDPZuOJ7u_AWLKaVnXW-zrVAOWbUEhdZpQWvKKczTTYYEsR4zfqSwUDULOH1s3W18TneUiB_QFDidcpKr63abnUgA + +https://photos.google.com/u/2/photo/AF1QipMWCRwtfPHED45inXlRNjeg28oyrfhW5UA60Qjg +AF1QipMWCRwtfPHED45inXlRNjeg28oyrfhW5UA60Qjg + diff --git a/main.go b/main.go index 23a59d2..faf0d8f 100644 --- a/main.go +++ b/main.go @@ -41,12 +41,16 @@ import ( ) var ( - nItemsFlag = flag.Int("n", -1, "number of items to download. If negative, get them all.") - devFlag = flag.Bool("dev", false, "dev mode. we reuse the same session dir (/tmp/gphotos-cdp), so we don't have to auth at every run.") - dlDirFlag = flag.String("dldir", "", "where to write the downloads. defaults to $HOME/Downloads/gphotos-cdp.") - startFlag = flag.String("start", "", "skip all photos until this location is reached. for debugging.") - runFlag = flag.String("run", "", "the program to run on each downloaded item, right after it is dowloaded. It is also the responsibility of that program to remove the downloaded item, if desired.") - verboseFlag = flag.Bool("v", false, "be verbose") + // TODO(daneroo): New flags after -v: order if merge? + nItemsFlag = flag.Int("n", -1, "number of items to download. If negative, get them all.") + devFlag = flag.Bool("dev", false, "dev mode. we reuse the same session dir (/tmp/gphotos-cdp), so we don't have to auth at every run.") + dlDirFlag = flag.String("dldir", "", "where to write the downloads. defaults to $HOME/Downloads/gphotos-cdp.") + startFlag = flag.String("start", "", "skip all photos until this location is reached. for debugging.") + runFlag = flag.String("run", "", "the program to run on each downloaded item, right after it is dowloaded. It is also the responsibility of that program to remove the downloaded item, if desired.") + verboseFlag = flag.Bool("v", false, "be verbose") + verboseTimingFlag = flag.Bool("vt", false, "be verbose about timing") + listFlag = flag.Bool("list", false, "Only list, do not download any images") + allFlag = flag.Bool("all", false, "Ignore -start and /.lastDone, start from oldest photo, implied by -list") ) var tick = 500 * time.Millisecond @@ -60,6 +64,13 @@ func main() { log.Print("-start only allowed in dev mode") return } + if *listFlag { + *allFlag = true + } + if *startFlag != "" && *allFlag { + log.Print("-start is ignored if -all (implied by -list)") + return + } s, err := NewSession() if err != nil { log.Print(err) @@ -68,6 +79,7 @@ func main() { defer s.Shutdown() log.Printf("Session Dir: %v", s.profileDir) + log.Printf("Download Dir: %v", s.dlDir) if err := s.cleanDlDir(); err != nil { log.Print(err) @@ -258,6 +270,12 @@ func (s *Session) login(ctx context.Context) error { // 2) if the last session marked what was the most recent downloaded photo, it navigates to it // 3) otherwise it jumps to the end of the timeline (i.e. the oldest photo) func (s *Session) firstNav(ctx context.Context) error { + + // fetch lastPhoto before bavigating to specific photoURL + if err := lastPhoto(ctx); err != nil { + return err + } + if *startFlag != "" { chromedp.Navigate(*startFlag).Do(ctx) chromedp.WaitReady("body", chromedp.ByQuery).Do(ctx) @@ -280,6 +298,25 @@ func (s *Session) firstNav(ctx context.Context) error { return nil } +func lastPhoto(ctx context.Context) error { + // This should be our TerminationCriteria + // extract most recent photo URL + // expr := `document.querySelector('a[href^="./photo/"]').href;` // first photo + // This selector matches elements + // - where the ^= operator matches prefix of the href attribute + sel := `a[href^="./photo/"]` // first photo on the landing page + var href string + ok := false + if err := chromedp.AttributeValue(sel, "href", &href, &ok).Do(ctx); err != nil { + return err + } + if !ok { + return errors.New("firstNav: Unable to find most recent photo") + } + log.Printf("Last Photo: %s (first on Landing Page)", href) + return nil +} + // navToEnd waits for the page to be ready to receive scroll key events, by // trying to select an item with the right arrow key, and then scrolls down to the // end of the page, i.e. to the oldest items. @@ -355,6 +392,7 @@ func navToLast(ctx context.Context) error { if location == prevLocation { break } + log.Printf("NavToLast iteration: location is %v", location) prevLocation = location } return nil @@ -375,9 +413,49 @@ func doRun(filePath string) error { } // navLeft navigates to the next item to the left -func navLeft(ctx context.Context) error { +func navLeft(ctx context.Context, prevLocation *string) error { + start := time.Now() + deadline := start.Add(5 * time.Minute) // sleep max of 5 minutes + maxBackoff := 500 * time.Millisecond + backoff := 10 * time.Millisecond // exp backoff: 10,20,40,..,maxBackoff,maxBackoff,.. + + // var prevLocation string + var location string + chromedp.KeyEvent(kb.ArrowLeft).Do(ctx) - chromedp.WaitReady("body", chromedp.ByQuery) + // I don't think this actually waits for anything, body is already ready + // ...usualy takes <10us + // chromedp.WaitReady("body", chromedp.ByQuery) + + n := 0 + for { + if err := chromedp.Location(&location).Do(ctx); err != nil { + return err + } + n++ + if location != *prevLocation { + break + } + + // Should return an error, but this is conflated with Termination Condition + if time.Now().After(deadline) { + log.Printf("NavLeft timed out after: %s (%d)", time.Since(start), n) + break + } + + time.Sleep(backoff) + if backoff < maxBackoff { + // calculate next exponential backOff (capped to maxBackoff) + backoff = backoff * 2 + if backoff > maxBackoff { + backoff = maxBackoff + } + } + } + elapsed := time.Since(start) + if *verboseTimingFlag && (n > 10 || elapsed.Seconds() > 10) { + log.Printf(". navLeft n:%d elapsed: %s", n, elapsed) + } return nil } @@ -419,6 +497,7 @@ func startDownload(ctx context.Context) error { for _, ev := range []*input.DispatchKeyEventParams{&down, &up} { if *verboseFlag { log.Printf("Event: %+v", *ev) + // log.Printf("Event: %+v", (*ev).Type) } if err := ev.Do(ctx); err != nil { return err @@ -532,8 +611,12 @@ func (s *Session) navN(N int) func(context.Context) error { return nil } var location, prevLocation string + totalDuration := 0.0 + batchDuration := 0.0 + batchSize := 1000 for { + start := time.Now() if err := chromedp.Location(&location).Do(ctx); err != nil { return err } @@ -541,20 +624,58 @@ func (s *Session) navN(N int) func(context.Context) error { break } prevLocation = location - filePath, err := s.dlAndMove(ctx, location) - if err != nil { - return err - } - if err := doRun(filePath); err != nil { - return err + + if !*listFlag { + filePath, err := s.dlAndMove(ctx, location) + if err != nil { + return err + } + if err := doRun(filePath); err != nil { + return err + } + } else { + if *verboseFlag { + log.Printf("Listing (%d): %v", n, location) + } } + n++ if N > 0 && n >= N { break } - if err := navLeft(ctx); err != nil { + // better termination - print rate befaore leaving? + if err := navLeft(ctx, &prevLocation); err != nil { return err } + totalDuration += time.Since(start).Seconds() + batchDuration += time.Since(start).Seconds() + if n%batchSize == 0 { + + // This is where we reload the page - this reduces accumulated latency significantly + // Page reload taked about 1.3 seconds + reloadStart := time.Now() + if err := chromedp.Reload().Do(ctx); err != nil { + if *verboseTimingFlag { + log.Printf(". Avg Latency (last %d @ %d): Marginal:%.2fms Cumulative: %.2fms", + batchSize, n, batchDuration*1000.0/float64(batchSize), totalDuration*1000.0/float64(n)) + } + log.Printf("Failed to reload Page at n:%d", n) + + return err + } + + if *verboseTimingFlag { + log.Printf(". Avg Latency (last %d @ %d): Marginal:%.2fms Cumulative: %.2fms Page Reloaded:%s", + batchSize, n, batchDuration*1000.0/float64(batchSize), totalDuration*1000.0/float64(n), time.Since(reloadStart)) + } + if *verboseFlag { + log.Printf("Reloaded page at: %s in %s", location, time.Since(reloadStart)) + } + batchDuration = 0.0 // reset Statistics + } + } + if *verboseTimingFlag { + log.Printf("Rate (%d): %.2f/s Avg Latency: %.2fms", n, float64(n)/totalDuration, totalDuration*1000.0/float64(n)) } return nil }