contrib/import-checker.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Sat, 25 Apr 2020 12:37:46 +0200
changeset 46151 c6bc77f7e593
parent 45849 c102b704edb5
child 47383 26127236b229
permissions -rwxr-xr-x
copies-rust: tokenize all paths into integer Copy information for each changesets tend to affect a small new number of path. However, each of these path might be handled a large number of time. Handling HgPathBuf (aka `Vec<u8>`) is expensive. Handling integer is cheap. With this patch we: - turn any input path into an integer "token" early, - do all the internal logic using such "token", - turn "token" back into path right before returning a result. This gives use a quite significant performance boost in our slower cases. Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.092828 s, 0.081225 s, -0.011603 s, × 0.8750, 11 µs/rev pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.711975 s, 0.586011 s, -0.125964 s, × 0.8231, 13 µs/rev pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.124505 s, 0.114173 s, -0.010332 s, × 0.9170, 10 µs/rev netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.072072 s, 0.061004 s, -0.011068 s, × 0.8464, 10 µs/rev netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.682732 s, 0.535874 s, -0.146858 s, × 0.7849, 8 µs/rev mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.935918 s, 1.781383 s, -0.154535 s, × 0.9202, 11 µs/rev mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.827320 s, 2.603867 s, -0.223453 s, × 0.9210, 12 µs/rev mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 3.243010 s, 1.529120 s, -1.713890 s, × 0.4715, 15 µs/rev mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 5.693818 s, 4.842699 s, -0.851119 s, × 0.8505, 13 µs/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 5.677655 s, 4.761732 s, -0.915923 s, × 0.8387, 13 µs/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 5.563370 s, 4.733912 s, -0.829458 s, × 0.8509, 13 µs/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.864099 s, 2.593410 s, -0.270689 s, × 0.9055, 13 µs/rev mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 113.297287 s, 41.041198 s, -72.256089 s, × 0.3622, 179 µs/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 59.498652 s, 27.915689 s, -31.582963 s, × 0.4692, 73 µs/rev Full timing comparison between this revision and the previous one: Repo Case Source-Rev Dest-Rev # of revisions old time new time Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000042 s, 0.000042 s, +0.000000 s, × 1.0000, 42 µs/rev mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.000104 s, 0.000110 s, +0.000006 s, × 1.0577, 18 µs/rev mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.004913 s, 0.004918 s, +0.000005 s, × 1.0010, 4 µs/rev pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.000191 s, 0.000195 s, +0.000004 s, × 1.0209, 21 µs/rev pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.000050 s, 0.000049 s, -0.000001 s, × 0.9800, 49 µs/rev pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.000112 s, 0.000112 s, +0.000000 s, × 1.0000, 16 µs/rev pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.000288 s, 0.000324 s, +0.000036 s, × 1.1250, 324 µs/rev pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.010411 s, 0.010611 s, +0.000200 s, × 1.0192, 1768 µs/rev pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 0.052852 s, 0.050835 s, -0.002017 s, × 0.9618, 10 µs/rev pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 0.092828 s, 0.081225 s, -0.011603 s, × 0.8750, 11 µs/rev pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 0.063269 s, 0.061291 s, -0.001978 s, × 0.9687, 11 µs/rev pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.711975 s, 0.586011 s, -0.125964 s, × 0.8231, 13 µs/rev pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 0.012771 s, 0.012824 s, +0.000053 s, × 1.0042, 6412 µs/rev pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 0.124505 s, 0.114173 s, -0.010332 s, × 0.9170, 10 µs/rev netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.000082 s, 0.000085 s, +0.000003 s, × 1.0366, 42 µs/rev netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.000111 s, 0.000108 s, -0.000003 s, × 0.9730, 54 µs/rev netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.000171 s, 0.000175 s, +0.000004 s, × 1.0234, 58 µs/rev netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.000708 s, 0.000719 s, +0.000011 s, × 1.0155, 79 µs/rev netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.010608 s, 0.010175 s, -0.000433 s, × 0.9592, 7 µs/rev netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.015635 s, 0.015569 s, -0.000066 s, × 0.9958, 10 µs/rev netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.072072 s, 0.061004 s, -0.011068 s, × 0.8464, 10 µs/rev netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 0.682732 s, 0.535874 s, -0.146858 s, × 0.7849, 8 µs/rev mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.000090 s, 0.000090 s, +0.000000 s, × 1.0000, 45 µs/rev mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.000210 s, 0.000281 s, +0.000071 s, × 1.3381, 35 µs/rev mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.000182 s, 0.000187 s, +0.000005 s, × 1.0275, 20 µs/rev mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.000594 s, 0.000660 s, +0.000066 s, × 1.1111, 94 µs/rev mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.003102 s, 0.003385 s, +0.000283 s, × 1.0912, 1128 µs/rev mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.060234 s, 0.069812 s, +0.009578 s, × 1.1590, 11635 µs/rev mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006300 s, 0.006503 s, +0.000203 s, × 1.0322, 4 µs/rev mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.004817 s, 0.004988 s, +0.000171 s, × 1.0355, 121 µs/rev mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 0.065451 s, 0.063963 s, -0.001488 s, × 0.9773, 8 µs/rev mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026282 s, 0.026225 s, -0.000057 s, × 0.9978, 42 µs/rev mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 0.206873 s, 0.201377 s, -0.005496 s, × 0.9734, 6 µs/rev mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 1.935918 s, 1.781383 s, -0.154535 s, × 0.9202, 11 µs/rev mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 2.827320 s, 2.603867 s, -0.223453 s, × 0.9210, 12 µs/rev mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.000842 s, 0.000845 s, +0.000003 s, × 1.0036, 422 µs/rev mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.000870 s, 0.000862 s, -0.000008 s, × 0.9908, 431 µs/rev mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.000165 s, 0.000161 s, -0.000004 s, × 0.9758, 40 µs/rev mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.001145 s, 0.001163 s, +0.000018 s, × 1.0157, 581 µs/rev mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 0.026500 s, 0.032414 s, +0.005914 s, × 1.2232, 32414 µs/rev mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 0.059407 s, 0.070149 s, +0.010742 s, × 1.1808, 11691 µs/rev mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.006325 s, 0.006526 s, +0.000201 s, × 1.0318, 4 µs/rev mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.005171 s, 0.005187 s, +0.000016 s, × 1.0031, 126 µs/rev mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 0.066837 s, 0.065047 s, -0.001790 s, × 0.9732, 9 µs/rev mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.314252 s, 0.301129 s, -0.013123 s, × 0.9582, 7 µs/rev mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.304160 s, 0.280683 s, -0.023477 s, × 0.9228, 7 µs/rev mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 0.089223 s, 0.084897 s, -0.004326 s, × 0.9515, 9 µs/rev mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 0.026711 s, 0.026620 s, -0.000091 s, × 0.9966, 43 µs/rev mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 3.243010 s, 1.529120 s, -1.713890 s, × 0.4715, 15 µs/rev mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 0.756500 s, 0.738709 s, -0.017791 s, × 0.9765, 14 µs/rev mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 5.693818 s, 4.842699 s, -0.851119 s, × 0.8505, 13 µs/rev mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 0.590904 s, 0.596946 s, +0.006042 s, × 1.0102, 17 µs/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 5.677655 s, 4.761732 s, -0.915923 s, × 0.8387, 13 µs/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 5.563370 s, 4.733912 s, -0.829458 s, × 0.8509, 13 µs/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 2.864099 s, 2.593410 s, -0.270689 s, × 0.9055, 13 µs/rev mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 113.297287 s, 41.041198 s, -72.256089 s, × 0.3622, 179 µs/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 59.498652 s, 27.915689 s, -31.582963 s, × 0.4692, 73 µs/rev Full timing comparison between this revision and the filelog copy tracing. Repo Case Source-Rev Dest-Rev # of revisions filelog sidedata Difference Factor time per rev --------------------------------------------------------------------------------------------------------------------------------------------------------------- mercurial x_revs_x_added_0_copies ad6b123de1c7 39cfcef4f463 : 1 revs, 0.000903 s, 0.000042 s, -0.000861 s, × 0.0465, 41 µs/rev mercurial x_revs_x_added_x_copies 2b1c78674230 0c1d10351869 : 6 revs, 0.001861 s, 0.000110 s, -0.001751 s, × 0.0591, 18 µs/rev mercurial x000_revs_x000_added_x_copies 81f8ff2a9bf2 dd3267698d84 : 1032 revs, 0.018577 s, 0.004918 s, -0.013659 s, × 0.2647, 4 µs/rev pypy x_revs_x_added_0_copies aed021ee8ae8 099ed31b181b : 9 revs, 0.001519 s, 0.000195 s, -0.001324 s, × 0.1283, 21 µs/rev pypy x_revs_x000_added_0_copies 4aa4e1f8e19a 359343b9ac0e : 1 revs, 0.213855 s, 0.000049 s, -0.350d73 s, × 0.0002, 48 µs/rev pypy x_revs_x_added_x_copies ac52eb7bbbb0 72e022663155 : 7 revs, 0.017022 s, 0.000112 s, -0.016910 s, × 0.0065, 15 µs/rev pypy x_revs_x00_added_x_copies c3b14617fbd7 ace7255d9a26 : 1 revs, 0.019398 s, 0.000324 s, -0.019074 s, × 0.0167, 323 µs/rev pypy x_revs_x000_added_x000_copies df6f7a526b60 a83dc6a2d56f : 6 revs, 0.769467 s, 0.010611 s, -0.758856 s, × 0.0137, 1768 µs/rev pypy x000_revs_xx00_added_0_copies 89a76aede314 2f22446ff07e : 4785 revs, 1.221952 s, 0.050835 s, -1.171117 s, × 0.0416, 10 µs/rev pypy x000_revs_x000_added_x_copies 8a3b5bfd266e 2c68e87c3efe : 6780 revs, 1.304007 s, 0.081225 s, -1.222782 s, × 0.0622, 11 µs/rev pypy x000_revs_x000_added_x000_copies 89a76aede314 7b3dda341c84 : 5441 revs, 1.686610 s, 0.061291 s, -1.625319 s, × 0.0363, 11 µs/rev pypy x0000_revs_x_added_0_copies d1defd0dc478 c9cb1334cc78 : 43645 revs, 0.001107 s, 0.586011 s, +0.584904 s, × 529.36, 13 µs/rev pypy x0000_revs_xx000_added_0_copies bf2c629d0071 4ffed77c095c : 2 revs, 1.100760 s, 0.012824 s, -1.087936 s, × 0.0116, 6408 µs/rev pypy x0000_revs_xx000_added_x000_copies 08ea3258278e d9fa043f30c0 : 11316 revs, 1.350547 s, 0.114173 s, -1.236374 s, × 0.0845, 10 µs/rev netbeans x_revs_x_added_0_copies fb0955ffcbcd a01e9239f9e7 : 2 revs, 0.027864 s, 0.000085 s, -0.027779 s, × 0.0030, 42 µs/rev netbeans x_revs_x000_added_0_copies 6f360122949f 20eb231cc7d0 : 2 revs, 0.132479 s, 0.000108 s, -0.132371 s, × 0.0008, 53 µs/rev netbeans x_revs_x_added_x_copies 1ada3faf6fb6 5a39d12eecf4 : 3 revs, 0.025405 s, 0.000175 s, -0.025230 s, × 0.0068, 58 µs/rev netbeans x_revs_x00_added_x_copies 35be93ba1e2c 9eec5e90c05f : 9 revs, 0.053244 s, 0.000719 s, -0.052525 s, × 0.0135, 79 µs/rev netbeans x000_revs_xx00_added_0_copies eac3045b4fdd 51d4ae7f1290 : 1421 revs, 0.038017 s, 0.010175 s, -0.027842 s, × 0.2676, 7 µs/rev netbeans x000_revs_x000_added_x_copies e2063d266acd 6081d72689dc : 1533 revs, 0.198308 s, 0.015569 s, -0.182739 s, × 0.0785, 10 µs/rev netbeans x000_revs_x000_added_x000_copies ff453e9fee32 411350406ec2 : 5750 revs, 0.949749 s, 0.061004 s, -0.888745 s, × 0.0642, 10 µs/rev netbeans x0000_revs_xx000_added_x000_copies 588c2d1ced70 1aad62e59ddd : 66949 revs, 3.932262 s, 0.535874 s, -3.396388 s, × 0.1362, 8 µs/rev mozilla-central x_revs_x_added_0_copies 3697f962bb7b 7015fcdd43a2 : 2 revs, 0.024490 s, 0.000090 s, -0.024400 s, × 0.0036, 44 µs/rev mozilla-central x_revs_x000_added_0_copies dd390860c6c9 40d0c5bed75d : 8 revs, 0.143885 s, 0.000281 s, -0.143604 s, × 0.0019, 35 µs/rev mozilla-central x_revs_x_added_x_copies 8d198483ae3b 14207ffc2b2f : 9 revs, 0.025471 s, 0.000187 s, -0.025284 s, × 0.0073, 20 µs/rev mozilla-central x_revs_x00_added_x_copies 98cbc58cc6bc 446a150332c3 : 7 revs, 0.086013 s, 0.000660 s, -0.085353 s, × 0.0076, 94 µs/rev mozilla-central x_revs_x000_added_x000_copies 3c684b4b8f68 0a5e72d1b479 : 3 revs, 0.200726 s, 0.003385 s, -0.197341 s, × 0.0168, 1127 µs/rev mozilla-central x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 2.224171 s, 0.069812 s, -2.154359 s, × 0.0313, 11633 µs/rev mozilla-central x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.090780 s, 0.006503 s, -0.084277 s, × 0.0716, 4 µs/rev mozilla-central x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.764805 s, 0.004988 s, -0.759817 s, × 0.0065, 121 µs/rev mozilla-central x000_revs_x000_added_x000_copies 7c97034feb78 4407bd0c6330 : 7839 revs, 1.161405 s, 0.063963 s, -1.097442 s, × 0.0550, 8 µs/rev mozilla-central x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 6.816186 s, 0.026225 s, -6.789961 s, × 0.0038, 42 µs/rev mozilla-central x0000_revs_xx000_added_x000_copies f78c615a656c 96a38b690156 : 30263 revs, 3.374819 s, 0.201377 s, -3.173442 s, × 0.0596, 6 µs/rev mozilla-central x00000_revs_x0000_added_x0000_copies 6832ae71433c 4c222a1d9a00 : 153721 revs, 16.285469 s, 1.781383 s, -14.504086 s, × 0.1093, 11 µs/rev mozilla-central x00000_revs_x00000_added_x000_copies 76caed42cf7c 1daa622bbe42 : 204976 revs, 21.207733 s, 2.603867 s, -18.603866 s, × 0.1227, 12 µs/rev mozilla-try x_revs_x_added_0_copies aaf6dde0deb8 9790f499805a : 2 revs, 0.080843 s, 0.000845 s, -0.079998 s, × 0.0104, 422 µs/rev mozilla-try x_revs_x000_added_0_copies d8d0222927b4 5bb8ce8c7450 : 2 revs, 0.511068 s, 0.000862 s, -0.510206 s, × 0.0016, 430 µs/rev mozilla-try x_revs_x_added_x_copies 092fcca11bdb 936255a0384a : 4 revs, 0.021573 s, 0.000161 s, -0.021412 s, × 0.0074, 40 µs/rev mozilla-try x_revs_x00_added_x_copies b53d2fadbdb5 017afae788ec : 2 revs, 0.227726 s, 0.001163 s, -0.226563 s, × 0.0051, 581 µs/rev mozilla-try x_revs_x000_added_x000_copies 20408ad61ce5 6f0ee96e21ad : 1 revs, 1.120448 s, 0.032414 s, -1.088034 s, × 0.0289, 32381 µs/rev mozilla-try x_revs_x0000_added_x0000_copies effb563bb7e5 c07a39dc4e80 : 6 revs, 2.241713 s, 0.070149 s, -2.171564 s, × 0.0312, 11689 µs/rev mozilla-try x000_revs_xx00_added_0_copies 6100d773079a 04a55431795e : 1593 revs, 0.090633 s, 0.006526 s, -0.084107 s, × 0.0720, 4 µs/rev mozilla-try x000_revs_x000_added_x_copies 9f17a6fc04f9 2d37b966abed : 41 revs, 0.770403 s, 0.005187 s, -0.765216 s, × 0.0067, 126 µs/rev mozilla-try x000_revs_x000_added_x000_copies 1346fd0130e4 4c65cbdabc1f : 6657 revs, 1.184557 s, 0.065047 s, -1.119510 s, × 0.0549, 9 µs/rev mozilla-try x0000_revs_x_added_0_copies 63519bfd42ee a36a2a865d92 : 40314 revs, 0.085790 s, 0.301129 s, +0.215339 s, × 3.5100, 7 µs/rev mozilla-try x0000_revs_x_added_x_copies 9fe69ff0762d bcabf2a78927 : 38690 revs, 0.080616 s, 0.280683 s, +0.200067 s, × 3.4817, 7 µs/rev mozilla-try x0000_revs_xx000_added_x_copies 156f6e2674f2 4d0f2c178e66 : 8598 revs, 7.712554 s, 0.084897 s, -7.627657 s, × 0.0110, 9 µs/rev mozilla-try x0000_revs_xx000_added_0_copies 9eec5917337d 67118cc6dcad : 615 revs, 6.937294 s, 0.026620 s, -6.910674 s, × 0.0038, 43 µs/rev mozilla-try x0000_revs_xx000_added_x000_copies 89294cd501d9 7ccb2fc7ccb5 : 97052 revs, 7.712313 s, 1.529120 s, -6.183193 s, × 0.1982, 15 µs/rev mozilla-try x0000_revs_x0000_added_x0000_copies e928c65095ed e951f4ad123a : 52031 revs, 9.966910 s, 0.738709 s, -9.228201 s, × 0.0741, 14 µs/rev mozilla-try x00000_revs_x_added_0_copies 6a320851d377 1ebb79acd503 : 363753 revs, 0.090397 s, 4.842699 s, +4.752302 s, × 53.571, 13 µs/rev mozilla-try x00000_revs_x00000_added_0_copies dc8a3ca7010e d16fde900c9c : 34414 revs, 27.817167 s, 0.596946 s, -27.220221 s, × 0.0214, 17 µs/rev mozilla-try x00000_revs_x_added_x_copies 5173c4b6f97c 95d83ee7242d : 362229 revs, 0.091305 s, 4.761732 s, +4.670427 s, × 52.151, 13 µs/rev mozilla-try x00000_revs_x000_added_x_copies 9126823d0e9c ca82787bb23c : 359344 revs, 0.231183 s, 4.733912 s, +4.502729 s, × 20.476, 13 µs/rev mozilla-try x00000_revs_x0000_added_x0000_copies 8d3fafa80d4b eb884023b810 : 192665 revs, 19.830617 s, 2.593410 s, -17.237207 s, × 0.1307, 13 µs/rev mozilla-try x00000_revs_x00000_added_x0000_copies 1b661134e2ca 1ae03d022d6d : 228985 revs, 21.743873 s, 41.041198 s, +19.297325 s, × 1.8874, 179 µs/rev mozilla-try x00000_revs_x00000_added_x000_copies 9b2a99adc05e 8e29777b48e6 : 382065 revs, 25.935037 s, 27.915689 s, +1.980652 s, × 1.0763, 73 µs/rev Differential Revision: https://phab.mercurial-scm.org/D9493

#!/usr/bin/env python3

from __future__ import absolute_import, print_function

import ast
import collections
import io
import os
import sys

# Import a minimal set of stdlib modules needed for list_stdlib_modules()
# to work when run from a virtualenv.  The modules were chosen empirically
# so that the return value matches the return value without virtualenv.
if True:  # disable lexical sorting checks
    try:
        import BaseHTTPServer as basehttpserver
    except ImportError:
        basehttpserver = None
    import zlib

import testparseutil

# Whitelist of modules that symbols can be directly imported from.
allowsymbolimports = (
    '__future__',
    'bzrlib',
    'hgclient',
    'mercurial',
    'mercurial.hgweb.common',
    'mercurial.hgweb.request',
    'mercurial.i18n',
    'mercurial.interfaces',
    'mercurial.node',
    'mercurial.pycompat',
    # for revlog to re-export constant to extensions
    'mercurial.revlogutils.constants',
    'mercurial.revlogutils.flagutil',
    # for cffi modules to re-export pure functions
    'mercurial.pure.base85',
    'mercurial.pure.bdiff',
    'mercurial.pure.mpatch',
    'mercurial.pure.osutil',
    'mercurial.pure.parsers',
    # third-party imports should be directly imported
    'mercurial.thirdparty',
    'mercurial.thirdparty.attr',
    'mercurial.thirdparty.zope',
    'mercurial.thirdparty.zope.interface',
)

# Whitelist of symbols that can be directly imported.
directsymbols = ('demandimport',)

# Modules that must be aliased because they are commonly confused with
# common variables and can create aliasing and readability issues.
requirealias = {
    'ui': 'uimod',
}


def usingabsolute(root):
    """Whether absolute imports are being used."""
    if sys.version_info[0] >= 3:
        return True

    for node in ast.walk(root):
        if isinstance(node, ast.ImportFrom):
            if node.module == '__future__':
                for n in node.names:
                    if n.name == 'absolute_import':
                        return True

    return False


def walklocal(root):
    """Recursively yield all descendant nodes but not in a different scope"""
    todo = collections.deque(ast.iter_child_nodes(root))
    yield root, False
    while todo:
        node = todo.popleft()
        newscope = isinstance(node, ast.FunctionDef)
        if not newscope:
            todo.extend(ast.iter_child_nodes(node))
        yield node, newscope


def dotted_name_of_path(path):
    """Given a relative path to a source file, return its dotted module name.

    >>> dotted_name_of_path('mercurial/error.py')
    'mercurial.error'
    >>> dotted_name_of_path('zlibmodule.so')
    'zlib'
    """
    parts = path.replace(os.sep, '/').split('/')
    parts[-1] = parts[-1].split('.', 1)[0]  # remove .py and .so and .ARCH.so
    if parts[-1].endswith('module'):
        parts[-1] = parts[-1][:-6]
    return '.'.join(parts)


def fromlocalfunc(modulename, localmods):
    """Get a function to examine which locally defined module the
    target source imports via a specified name.

    `modulename` is an `dotted_name_of_path()`-ed source file path,
    which may have `.__init__` at the end of it, of the target source.

    `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
    paths of locally defined (= Mercurial specific) modules.

    This function assumes that module names not existing in
    `localmods` are from the Python standard library.

    This function returns the function, which takes `name` argument,
    and returns `(absname, dottedpath, hassubmod)` tuple if `name`
    matches against locally defined module. Otherwise, it returns
    False.

    It is assumed that `name` doesn't have `.__init__`.

    `absname` is an absolute module name of specified `name`
    (e.g. "hgext.convert"). This can be used to compose prefix for sub
    modules or so.

    `dottedpath` is a `dotted_name_of_path()`-ed source file path
    (e.g. "hgext.convert.__init__") of `name`. This is used to look
    module up in `localmods` again.

    `hassubmod` is whether it may have sub modules under it (for
    convenient, even though this is also equivalent to "absname !=
    dottednpath")

    >>> localmods = {'foo.__init__', 'foo.foo1',
    ...              'foo.bar.__init__', 'foo.bar.bar1',
    ...              'baz.__init__', 'baz.baz1'}
    >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
    >>> # relative
    >>> fromlocal('foo1')
    ('foo.foo1', 'foo.foo1', False)
    >>> fromlocal('bar')
    ('foo.bar', 'foo.bar.__init__', True)
    >>> fromlocal('bar.bar1')
    ('foo.bar.bar1', 'foo.bar.bar1', False)
    >>> # absolute
    >>> fromlocal('baz')
    ('baz', 'baz.__init__', True)
    >>> fromlocal('baz.baz1')
    ('baz.baz1', 'baz.baz1', False)
    >>> # unknown = maybe standard library
    >>> fromlocal('os')
    False
    >>> fromlocal(None, 1)
    ('foo', 'foo.__init__', True)
    >>> fromlocal('foo1', 1)
    ('foo.foo1', 'foo.foo1', False)
    >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
    >>> fromlocal2(None, 2)
    ('foo', 'foo.__init__', True)
    >>> fromlocal2('bar2', 1)
    False
    >>> fromlocal2('bar', 2)
    ('foo.bar', 'foo.bar.__init__', True)
    """
    if not isinstance(modulename, str):
        modulename = modulename.decode('ascii')
    prefix = '.'.join(modulename.split('.')[:-1])
    if prefix:
        prefix += '.'

    def fromlocal(name, level=0):
        # name is false value when relative imports are used.
        if not name:
            # If relative imports are used, level must not be absolute.
            assert level > 0
            candidates = ['.'.join(modulename.split('.')[:-level])]
        else:
            if not level:
                # Check relative name first.
                candidates = [prefix + name, name]
            else:
                candidates = [
                    '.'.join(modulename.split('.')[:-level]) + '.' + name
                ]

        for n in candidates:
            if n in localmods:
                return (n, n, False)
            dottedpath = n + '.__init__'
            if dottedpath in localmods:
                return (n, dottedpath, True)
        return False

    return fromlocal


def populateextmods(localmods):
    """Populate C extension modules based on pure modules"""
    newlocalmods = set(localmods)
    for n in localmods:
        if n.startswith('mercurial.pure.'):
            m = n[len('mercurial.pure.') :]
            newlocalmods.add('mercurial.cext.' + m)
            newlocalmods.add('mercurial.cffi._' + m)
    return newlocalmods


def list_stdlib_modules():
    """List the modules present in the stdlib.

    >>> py3 = sys.version_info[0] >= 3
    >>> mods = set(list_stdlib_modules())
    >>> 'BaseHTTPServer' in mods or py3
    True

    os.path isn't really a module, so it's missing:

    >>> 'os.path' in mods
    False

    sys requires special treatment, because it's baked into the
    interpreter, but it should still appear:

    >>> 'sys' in mods
    True

    >>> 'collections' in mods
    True

    >>> 'cStringIO' in mods or py3
    True

    >>> 'cffi' in mods
    True
    """
    for m in sys.builtin_module_names:
        yield m
    # These modules only exist on windows, but we should always
    # consider them stdlib.
    for m in ['msvcrt', '_winreg']:
        yield m
    yield '__builtin__'
    yield 'builtins'  # python3 only
    yield 'importlib.abc'  # python3 only
    yield 'importlib.machinery'  # python3 only
    yield 'importlib.util'  # python3 only
    for m in 'fcntl', 'grp', 'pwd', 'termios':  # Unix only
        yield m
    for m in 'cPickle', 'datetime':  # in Python (not C) on PyPy
        yield m
    for m in ['cffi']:
        yield m
    stdlib_prefixes = {sys.prefix, sys.exec_prefix}
    # We need to supplement the list of prefixes for the search to work
    # when run from within a virtualenv.
    for mod in (basehttpserver, zlib):
        if mod is None:
            continue
        try:
            # Not all module objects have a __file__ attribute.
            filename = mod.__file__
        except AttributeError:
            continue
        dirname = os.path.dirname(filename)
        for prefix in stdlib_prefixes:
            if dirname.startswith(prefix):
                # Then this directory is redundant.
                break
        else:
            stdlib_prefixes.add(dirname)
    sourceroot = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
    for libpath in sys.path:
        # We want to walk everything in sys.path that starts with something in
        # stdlib_prefixes, but not directories from the hg sources.
        if os.path.abspath(libpath).startswith(sourceroot) or not any(
            libpath.startswith(p) for p in stdlib_prefixes
        ):
            continue
        for top, dirs, files in os.walk(libpath):
            for i, d in reversed(list(enumerate(dirs))):
                if (
                    not os.path.exists(os.path.join(top, d, '__init__.py'))
                    or top == libpath
                    and d in ('hgdemandimport', 'hgext', 'mercurial')
                ):
                    del dirs[i]
            for name in files:
                if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
                    continue
                if name.startswith('__init__.py'):
                    full_path = top
                else:
                    full_path = os.path.join(top, name)
                rel_path = full_path[len(libpath) + 1 :]
                mod = dotted_name_of_path(rel_path)
                yield mod


stdlib_modules = set(list_stdlib_modules())


def imported_modules(source, modulename, f, localmods, ignore_nested=False):
    """Given the source of a file as a string, yield the names
    imported by that file.

    Args:
      source: The python source to examine as a string.
      modulename: of specified python source (may have `__init__`)
      localmods: set of locally defined module names (may have `__init__`)
      ignore_nested: If true, import statements that do not start in
                     column zero will be ignored.

    Returns:
      A list of absolute module names imported by the given source.

    >>> f = 'foo/xxx.py'
    >>> modulename = 'foo.xxx'
    >>> localmods = {'foo.__init__': True,
    ...              'foo.foo1': True, 'foo.foo2': True,
    ...              'foo.bar.__init__': True, 'foo.bar.bar1': True,
    ...              'baz.__init__': True, 'baz.baz1': True }
    >>> # standard library (= not locally defined ones)
    >>> sorted(imported_modules(
    ...        'from stdlib1 import foo, bar; import stdlib2',
    ...        modulename, f, localmods))
    []
    >>> # relative importing
    >>> sorted(imported_modules(
    ...        'import foo1; from bar import bar1',
    ...        modulename, f, localmods))
    ['foo.bar.bar1', 'foo.foo1']
    >>> sorted(imported_modules(
    ...        'from bar.bar1 import name1, name2, name3',
    ...        modulename, f, localmods))
    ['foo.bar.bar1']
    >>> # absolute importing
    >>> sorted(imported_modules(
    ...        'from baz import baz1, name1',
    ...        modulename, f, localmods))
    ['baz.__init__', 'baz.baz1']
    >>> # mixed importing, even though it shouldn't be recommended
    >>> sorted(imported_modules(
    ...        'import stdlib, foo1, baz',
    ...        modulename, f, localmods))
    ['baz.__init__', 'foo.foo1']
    >>> # ignore_nested
    >>> sorted(imported_modules(
    ... '''import foo
    ... def wat():
    ...     import bar
    ... ''', modulename, f, localmods))
    ['foo.__init__', 'foo.bar.__init__']
    >>> sorted(imported_modules(
    ... '''import foo
    ... def wat():
    ...     import bar
    ... ''', modulename, f, localmods, ignore_nested=True))
    ['foo.__init__']
    """
    fromlocal = fromlocalfunc(modulename, localmods)
    for node in ast.walk(ast.parse(source, f)):
        if ignore_nested and getattr(node, 'col_offset', 0) > 0:
            continue
        if isinstance(node, ast.Import):
            for n in node.names:
                found = fromlocal(n.name)
                if not found:
                    # this should import standard library
                    continue
                yield found[1]
        elif isinstance(node, ast.ImportFrom):
            found = fromlocal(node.module, node.level)
            if not found:
                # this should import standard library
                continue

            absname, dottedpath, hassubmod = found
            if not hassubmod:
                # "dottedpath" is not a package; must be imported
                yield dottedpath
                # examination of "node.names" should be redundant
                # e.g.: from mercurial.node import nullid, nullrev
                continue

            modnotfound = False
            prefix = absname + '.'
            for n in node.names:
                found = fromlocal(prefix + n.name)
                if not found:
                    # this should be a function or a property of "node.module"
                    modnotfound = True
                    continue
                yield found[1]
            if modnotfound and dottedpath != modulename:
                # "dottedpath" is a package, but imported because of non-module
                # lookup
                # specifically allow "from . import foo" from __init__.py
                yield dottedpath


def verify_import_convention(module, source, localmods):
    """Verify imports match our established coding convention.

    We have 2 conventions: legacy and modern. The modern convention is in
    effect when using absolute imports.

    The legacy convention only looks for mixed imports. The modern convention
    is much more thorough.
    """
    root = ast.parse(source)
    absolute = usingabsolute(root)

    if absolute:
        return verify_modern_convention(module, root, localmods)
    else:
        return verify_stdlib_on_own_line(root)


def verify_modern_convention(module, root, localmods, root_col_offset=0):
    """Verify a file conforms to the modern import convention rules.

    The rules of the modern convention are:

    * Ordering is stdlib followed by local imports. Each group is lexically
      sorted.
    * Importing multiple modules via "import X, Y" is not allowed: use
      separate import statements.
    * Importing multiple modules via "from X import ..." is allowed if using
      parenthesis and one entry per line.
    * Only 1 relative import statement per import level ("from .", "from ..")
      is allowed.
    * Relative imports from higher levels must occur before lower levels. e.g.
      "from .." must be before "from .".
    * Imports from peer packages should use relative import (e.g. do not
      "import mercurial.foo" from a "mercurial.*" module).
    * Symbols can only be imported from specific modules (see
      `allowsymbolimports`). For other modules, first import the module then
      assign the symbol to a module-level variable. In addition, these imports
      must be performed before other local imports. This rule only
      applies to import statements outside of any blocks.
    * Relative imports from the standard library are not allowed, unless that
      library is also a local module.
    * Certain modules must be aliased to alternate names to avoid aliasing
      and readability problems. See `requirealias`.
    """
    if not isinstance(module, str):
        module = module.decode('ascii')
    topmodule = module.split('.')[0]
    fromlocal = fromlocalfunc(module, localmods)

    # Whether a local/non-stdlib import has been performed.
    seenlocal = None
    # Whether a local/non-stdlib, non-symbol import has been seen.
    seennonsymbollocal = False
    # The last name to be imported (for sorting).
    lastname = None
    laststdlib = None
    # Relative import levels encountered so far.
    seenlevels = set()

    for node, newscope in walklocal(root):

        def msg(fmt, *args):
            return (fmt % args, node.lineno)

        if newscope:
            # Check for local imports in function
            for r in verify_modern_convention(
                module, node, localmods, node.col_offset + 4
            ):
                yield r
        elif isinstance(node, ast.Import):
            # Disallow "import foo, bar" and require separate imports
            # for each module.
            if len(node.names) > 1:
                yield msg(
                    'multiple imported names: %s',
                    ', '.join(n.name for n in node.names),
                )

            name = node.names[0].name
            asname = node.names[0].asname

            stdlib = name in stdlib_modules

            # Ignore sorting rules on imports inside blocks.
            if node.col_offset == root_col_offset:
                if lastname and name < lastname and laststdlib == stdlib:
                    yield msg(
                        'imports not lexically sorted: %s < %s', name, lastname
                    )

            lastname = name
            laststdlib = stdlib

            # stdlib imports should be before local imports.
            if stdlib and seenlocal and node.col_offset == root_col_offset:
                yield msg(
                    'stdlib import "%s" follows local import: %s',
                    name,
                    seenlocal,
                )

            if not stdlib:
                seenlocal = name

            # Import of sibling modules should use relative imports.
            topname = name.split('.')[0]
            if topname == topmodule:
                yield msg('import should be relative: %s', name)

            if name in requirealias and asname != requirealias[name]:
                yield msg(
                    '%s module must be "as" aliased to %s',
                    name,
                    requirealias[name],
                )

        elif isinstance(node, ast.ImportFrom):
            # Resolve the full imported module name.
            if node.level > 0:
                fullname = '.'.join(module.split('.')[: -node.level])
                if node.module:
                    fullname += '.%s' % node.module
            else:
                assert node.module
                fullname = node.module

                topname = fullname.split('.')[0]
                if topname == topmodule:
                    yield msg('import should be relative: %s', fullname)

            # __future__ is special since it needs to come first and use
            # symbol import.
            if fullname != '__future__':
                if not fullname or (
                    fullname in stdlib_modules
                    # allow standard 'from typing import ...' style
                    and fullname.startswith('.')
                    and fullname not in localmods
                    and fullname + '.__init__' not in localmods
                ):
                    yield msg('relative import of stdlib module')
                else:
                    seenlocal = fullname

            # Direct symbol import is only allowed from certain modules and
            # must occur before non-symbol imports.
            found = fromlocal(node.module, node.level)
            if found and found[2]:  # node.module is a package
                prefix = found[0] + '.'
                symbols = (
                    n.name for n in node.names if not fromlocal(prefix + n.name)
                )
            else:
                symbols = (n.name for n in node.names)
            symbols = [sym for sym in symbols if sym not in directsymbols]
            if node.module and node.col_offset == root_col_offset:
                if symbols and fullname not in allowsymbolimports:
                    yield msg(
                        'direct symbol import %s from %s',
                        ', '.join(symbols),
                        fullname,
                    )

                if symbols and seennonsymbollocal:
                    yield msg(
                        'symbol import follows non-symbol import: %s', fullname
                    )
            if not symbols and fullname not in stdlib_modules:
                seennonsymbollocal = True

            if not node.module:
                assert node.level

                # Only allow 1 group per level.
                if (
                    node.level in seenlevels
                    and node.col_offset == root_col_offset
                ):
                    yield msg(
                        'multiple "from %s import" statements', '.' * node.level
                    )

                # Higher-level groups come before lower-level groups.
                if any(node.level > l for l in seenlevels):
                    yield msg(
                        'higher-level import should come first: %s', fullname
                    )

                seenlevels.add(node.level)

            # Entries in "from .X import ( ... )" lists must be lexically
            # sorted.
            lastentryname = None

            for n in node.names:
                if lastentryname and n.name < lastentryname:
                    yield msg(
                        'imports from %s not lexically sorted: %s < %s',
                        fullname,
                        n.name,
                        lastentryname,
                    )

                lastentryname = n.name

                if n.name in requirealias and n.asname != requirealias[n.name]:
                    yield msg(
                        '%s from %s must be "as" aliased to %s',
                        n.name,
                        fullname,
                        requirealias[n.name],
                    )


def verify_stdlib_on_own_line(root):
    """Given some python source, verify that stdlib imports are done
    in separate statements from relative local module imports.

    >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
    [('mixed imports\\n   stdlib:    sys\\n   relative:  foo', 1)]
    >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
    []
    >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
    []
    """
    for node in ast.walk(root):
        if isinstance(node, ast.Import):
            from_stdlib = {False: [], True: []}
            for n in node.names:
                from_stdlib[n.name in stdlib_modules].append(n.name)
            if from_stdlib[True] and from_stdlib[False]:
                yield (
                    'mixed imports\n   stdlib:    %s\n   relative:  %s'
                    % (
                        ', '.join(sorted(from_stdlib[True])),
                        ', '.join(sorted(from_stdlib[False])),
                    ),
                    node.lineno,
                )


class CircularImport(Exception):
    pass


def checkmod(mod, imports):
    shortest = {}
    visit = [[mod]]
    while visit:
        path = visit.pop(0)
        for i in sorted(imports.get(path[-1], [])):
            if len(path) < shortest.get(i, 1000):
                shortest[i] = len(path)
                if i in path:
                    if i == path[0]:
                        raise CircularImport(path)
                    continue
                visit.append(path + [i])


def rotatecycle(cycle):
    """arrange a cycle so that the lexicographically first module listed first

    >>> rotatecycle(['foo', 'bar'])
    ['bar', 'foo', 'bar']
    """
    lowest = min(cycle)
    idx = cycle.index(lowest)
    return cycle[idx:] + cycle[:idx] + [lowest]


def find_cycles(imports):
    """Find cycles in an already-loaded import graph.

    All module names recorded in `imports` should be absolute one.

    >>> from __future__ import print_function
    >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
    ...            'top.bar': ['top.baz', 'sys'],
    ...            'top.baz': ['top.foo'],
    ...            'top.qux': ['top.foo']}
    >>> print('\\n'.join(sorted(find_cycles(imports))))
    top.bar -> top.baz -> top.foo -> top.bar
    top.foo -> top.qux -> top.foo
    """
    cycles = set()
    for mod in sorted(imports.keys()):
        try:
            checkmod(mod, imports)
        except CircularImport as e:
            cycle = e.args[0]
            cycles.add(" -> ".join(rotatecycle(cycle)))
    return cycles


def _cycle_sortkey(c):
    return len(c), c


def embedded(f, modname, src):
    """Extract embedded python code

    >>> def _forcestr(thing):
    ...     if not isinstance(thing, str):
    ...         return thing.decode('ascii')
    ...     return thing
    >>> def test(fn, lines):
    ...     for s, m, f, l in embedded(fn, b"example", lines):
    ...         print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
    ...         print(repr(_forcestr(s)))
    >>> lines = [
    ...   'comment',
    ...   '  >>> from __future__ import print_function',
    ...   "  >>> ' multiline",
    ...   "  ... string'",
    ...   '  ',
    ...   'comment',
    ...   '  $ cat > foo.py <<EOF',
    ...   '  > from __future__ import print_function',
    ...   '  > EOF',
    ... ]
    >>> test(b"example.t", lines)
    example[2] doctest.py 1
    "from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
    example[8] foo.py 7
    'from __future__ import print_function\\n'
    """
    errors = []
    for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
        if not name:
            # use 'doctest.py', in order to make already existing
            # doctest above pass instantly
            name = 'doctest.py'
        # "starts" is "line number" (1-origin), but embedded() is
        # expected to return "line offset" (0-origin). Therefore, this
        # yields "starts - 1".
        if not isinstance(modname, str):
            modname = modname.decode('utf8')
        yield code, "%s[%d]" % (modname, starts), name, starts - 1


def sources(f, modname):
    """Yields possibly multiple sources from a filepath

    input: filepath, modulename
    yields:  script(string), modulename, filepath, linenumber

    For embedded scripts, the modulename and filepath will be different
    from the function arguments. linenumber is an offset relative to
    the input file.
    """
    py = False
    if not f.endswith('.t'):
        with open(f, 'rb') as src:
            yield src.read(), modname, f, 0
            py = True
    if py or f.endswith('.t'):
        # Strictly speaking we should sniff for the magic header that denotes
        # Python source file encoding. But in reality we don't use anything
        # other than ASCII (mainly) and UTF-8 (in a few exceptions), so
        # simplicity is fine.
        with io.open(f, 'r', encoding='utf-8') as src:
            for script, modname, t, line in embedded(f, modname, src):
                yield script, modname.encode('utf8'), t, line


def main(argv):
    if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
        print('Usage: %s {-|file [file] [file] ...}')
        return 1
    if argv[1] == '-':
        argv = argv[:1]
        argv.extend(l.rstrip() for l in sys.stdin.readlines())
    localmodpaths = {}
    used_imports = {}
    any_errors = False
    for source_path in argv[1:]:
        modname = dotted_name_of_path(source_path)
        localmodpaths[modname] = source_path
    localmods = populateextmods(localmodpaths)
    for localmodname, source_path in sorted(localmodpaths.items()):
        if not isinstance(localmodname, bytes):
            # This is only safe because all hg's files are ascii
            localmodname = localmodname.encode('ascii')
        for src, modname, name, line in sources(source_path, localmodname):
            try:
                used_imports[modname] = sorted(
                    imported_modules(
                        src, modname, name, localmods, ignore_nested=True
                    )
                )
                for error, lineno in verify_import_convention(
                    modname, src, localmods
                ):
                    any_errors = True
                    print('%s:%d: %s' % (source_path, lineno + line, error))
            except SyntaxError as e:
                print(
                    '%s:%d: SyntaxError: %s' % (source_path, e.lineno + line, e)
                )
    cycles = find_cycles(used_imports)
    if cycles:
        firstmods = set()
        for c in sorted(cycles, key=_cycle_sortkey):
            first = c.split()[0]
            # As a rough cut, ignore any cycle that starts with the
            # same module as some other cycle. Otherwise we see lots
            # of cycles that are effectively duplicates.
            if first in firstmods:
                continue
            print('Import cycle:', c)
            firstmods.add(first)
        any_errors = True
    return any_errors != 0


if __name__ == '__main__':
    sys.exit(int(main(sys.argv)))