From d7f2544712a59d23b60ea92913773164d06f4164 Mon Sep 17 00:00:00 2001 From: auxiliarypower <134689569+auxiliarypower@users.noreply.github.com> Date: Tue, 5 Dec 2023 19:03:12 +0800 Subject: [PATCH 01/10] refactor: rename files for clarity --- update-script => manager.py | 0 geolocs.json => mirrors.geojson | 0 check-lastupdate => stats.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename update-script => manager.py (100%) mode change 100755 => 100644 rename geolocs.json => mirrors.geojson (100%) rename check-lastupdate => stats.py (100%) mode change 100755 => 100644 diff --git a/update-script b/manager.py old mode 100755 new mode 100644 similarity index 100% rename from update-script rename to manager.py diff --git a/geolocs.json b/mirrors.geojson similarity index 100% rename from geolocs.json rename to mirrors.geojson diff --git a/check-lastupdate b/stats.py old mode 100755 new mode 100644 similarity index 100% rename from check-lastupdate rename to stats.py From 2165dccdc26dc105b2fca31c255e00643f53b7bb Mon Sep 17 00:00:00 2001 From: auxiliarypower <134689569+auxiliarypower@users.noreply.github.com> Date: Tue, 5 Dec 2023 19:13:53 +0800 Subject: [PATCH 02/10] style: format with ruff and prettier - Format the files using the default settings of ruff and prettier - Validate that yaml file comply with the yamllint rules --- manager.py | 209 +++--- map.html | 117 +-- mirrors.geojson | 1878 ++++++++++++++++++++++++++++++++++++++++++++++- mirrors.yaml | 1411 +++++++++++++++++------------------ stats.py | 112 +-- 5 files changed, 2831 insertions(+), 896 deletions(-) diff --git a/manager.py b/manager.py index 62f2730..10afde1 100644 --- a/manager.py +++ b/manager.py @@ -1,20 +1,19 @@ #!/usr/bin/python3 -import sys -import time import argparse +import json +import logging import pprint -from urllib.parse import urlparse, urlunparse, quote import socket -from ipaddress import ip_address, IPv4Address, IPv6Address -from http.client import HTTPConnection, HTTPSConnection +import sys +import time from collections import OrderedDict -import logging -import json +from http.client import HTTPConnection, HTTPSConnection +from ipaddress import IPv4Address, IPv6Address, ip_address +from urllib.parse import quote, urlparse, urlunparse import yaml # in python-yaml package - SOURCE_YAML = "mirrors.yaml" OUTPUT_README = "README.md" OUTPUT_MIRRORLIST = "archlinuxcn-mirrorlist" @@ -49,49 +48,57 @@ ``` """ + ## ordered_load/dump_yaml from https://stackoverflow.com/a/21912744 def ordered_load_yaml(stream, Loader=yaml.Loader, object_pairs_hook=OrderedDict): class OrderedLoader(Loader): pass + def construct_mapping(loader, node): loader.flatten_mapping(node) return object_pairs_hook(loader.construct_pairs(node)) + OrderedLoader.add_constructor( - yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, - construct_mapping) + yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping + ) return yaml.load(stream, OrderedLoader) + def ordered_dump_yaml(data, stream=None, Dumper=yaml.Dumper, **kwds): class OrderedDumper(Dumper): pass + def _dict_representer(dumper, data): return dumper.represent_mapping( - yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, - data.items()) + yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items() + ) + OrderedDumper.add_representer(OrderedDict, _dict_representer) return yaml.dump(data, stream, OrderedDumper, **kwds) + def mirror_score(m): - if m['provider'] == 'CDN': + if m["provider"] == "CDN": return 1000 try: - protocols = m['protocols'] + protocols = m["protocols"] except KeyError: return 0 score = 0 - if 'https' in protocols: + if "https" in protocols: score += 100 - if 'ipv6' in protocols: + if "ipv6" in protocols: score += 100 - if 'http' in protocols and 'https' not in protocols: + if "http" in protocols and "https" not in protocols: score += 10 - if 'ipv4' in protocols: + if "ipv4" in protocols: score += 10 return score + def mirror_title(item): title = f'{item["provider"]}' if "location" in item: @@ -108,47 +115,56 @@ def mirror_comments(item): if "comment" in item: comments.append(f"## {item['comment']}") if comments: - return '\n' + '\n'.join(comments) + return "\n" + "\n".join(comments) else: - return '' + return "" def readme_item(item): return README_ITEM_TEMPLATE.format( - title=mirror_title(item), comments=mirror_comments(item), **item) + title=mirror_title(item), comments=mirror_comments(item), **item + ) def gen_readme(mirrors): - with open(OUTPUT_README, 'w') as output: + with open(OUTPUT_README, "w") as output: readme_items = [ - readme_item(item) for item in mirrors - if {'http', 'https'} & set(item['protocols']) + readme_item(item) + for item in mirrors + if {"http", "https"} & set(item["protocols"]) ] - print(README_TEMPLATE.format('\n'.join(readme_items)), file=output) + print(README_TEMPLATE.format("\n".join(readme_items)), file=output) def mirrorlist_item(item): - return MIRRORLIST_ITEM_TEMPLATE.format( - title=mirror_title(item), **item) + return MIRRORLIST_ITEM_TEMPLATE.format(title=mirror_title(item), **item) def gen_mirrorlist(mirrors): - with open(OUTPUT_MIRRORLIST, 'w') as output: - print(f"""\ + with open(OUTPUT_MIRRORLIST, "w") as output: + print( + f"""\ ## ## Arch Linux CN community repository mirrorlist ## Generated on {time.strftime('%Y-%m-%d')} ## -""", file=output) +""", + file=output, + ) - print("\n".join( - mirrorlist_item(item) for item in mirrors - if {'http', 'https'} & set(item['protocols']) - ), file=output, end='') + print( + "\n".join( + mirrorlist_item(item) + for item in mirrors + if {"http", "https"} & set(item["protocols"]) + ), + file=output, + end="", + ) def sub_readme(args): - with open(SOURCE_YAML, 'r') as source: + with open(SOURCE_YAML, "r") as source: try: mirrors = ordered_load_yaml(source) # mirrors.sort(key=lambda m: -mirror_score(m)) @@ -158,7 +174,7 @@ def sub_readme(args): def sub_mirrorlist(args): - with open(SOURCE_YAML, 'r') as source: + with open(SOURCE_YAML, "r") as source: try: mirrors = ordered_load_yaml(source) # mirrors.sort(key=lambda m: -mirror_score(m)) @@ -168,7 +184,7 @@ def sub_mirrorlist(args): def sub_list(args): - with open(SOURCE_YAML, 'r') as source: + with open(SOURCE_YAML, "r") as source: try: mirrors = ordered_load_yaml(source) pprint.pprint(mirrors) @@ -180,9 +196,13 @@ def sub_list(args): def try_connect(domain, url, connection): try: http = connection(domain, timeout=5) - http.request('GET', url.path, headers={ - 'User-Agent': 'curl/8.0.1', - }) + http.request( + "GET", + url.path, + headers={ + "User-Agent": "curl/8.0.1", + }, + ) res = http.getresponse() if res.status == 200: return True @@ -191,20 +211,23 @@ def try_connect(domain, url, connection): def try_protocols(mirror): - url = urlparse(mirror['url']) + url = urlparse(mirror["url"]) domain = url.hostname protocols = [] - print('Accessing "{provider}" at "{domain}": ... '.format( - domain=domain, **mirror), end='', flush=True) + print( + 'Accessing "{provider}" at "{domain}": ... '.format(domain=domain, **mirror), + end="", + flush=True, + ) try: - for (family, _, _, _, sockaddr) in socket.getaddrinfo(domain, 80): + for family, _, _, _, sockaddr in socket.getaddrinfo(domain, 80): ip = sockaddr[0] ipa = ip_address(ip) if ipa.is_global: - if type(ipa) is IPv4Address and 'ipv4' not in protocols: + if type(ipa) is IPv4Address and "ipv4" not in protocols: protocols.append("ipv4") - if type(ipa) is IPv6Address and 'ipv6' not in protocols: + if type(ipa) is IPv6Address and "ipv6" not in protocols: protocols.append("ipv6") protocols.sort() except socket.gaierror: @@ -212,19 +235,19 @@ def try_protocols(mirror): else: if try_connect(domain, url, HTTPConnection): protocols.append("http") - url = url._replace(scheme='http') + url = url._replace(scheme="http") if try_connect(domain, url, HTTPSConnection): protocols.append("https") - url = url._replace(scheme='https') + url = url._replace(scheme="https") print(", ".join(protocols)) - mirror['protocols'] = protocols - mirror['url'] = urlunparse(url) + mirror["protocols"] = protocols + mirror["url"] = urlunparse(url) def sub_protocols(args): mirrors = [] - with open(SOURCE_YAML, 'r') as source: + with open(SOURCE_YAML, "r") as source: try: mirrors = ordered_load_yaml(source) except yaml.YAMLError as e: @@ -233,8 +256,12 @@ def sub_protocols(args): for m in mirrors: try_protocols(m) with open(SOURCE_YAML, "w") as output: - print(ordered_dump_yaml(mirrors, encoding=None, allow_unicode=True, - default_flow_style=False), file=output) + print( + ordered_dump_yaml( + mirrors, encoding=None, allow_unicode=True, default_flow_style=False + ), + file=output, + ) def sub_all(args): @@ -245,21 +272,22 @@ def sub_all(args): def geoencoding(session, loc): res = session.get( - 'https://nominatim.openstreetmap.org/search?q=%s&format=jsonv2' % quote(loc), - headers = { - 'User-Agent': 'archlinuxcn/mirrorlist-repo updater/0.1', + "https://nominatim.openstreetmap.org/search?q=%s&format=jsonv2" % quote(loc), + headers={ + "User-Agent": "archlinuxcn/mirrorlist-repo updater/0.1", }, ) geo = res.json()[0] - logging.info('%s is at (%s, %s)', loc, geo['lat'], geo['lon']) - return '%(lat)s, %(lon)s' % geo + logging.info("%s is at (%s, %s)", loc, geo["lat"], geo["lon"]) + return "%(lat)s, %(lon)s" % geo def sub_geo(args): mirrors = [] import requests + session = requests.Session() - with open(SOURCE_YAML, 'r') as source: + with open(SOURCE_YAML, "r") as source: try: mirrors = ordered_load_yaml(source) except yaml.YAMLError as e: @@ -267,13 +295,13 @@ def sub_geo(args): sys.exit(1) places = {} for m in mirrors: - locs = m.get('geolocs') - coords = m.get('geocoords') + locs = m.get("geolocs") + coords = m.get("geocoords") if locs and coords and len(locs) == len(coords): places.update(zip(locs, coords)) for m in mirrors: - locs = m.get('geolocs') - coords = m.get('geocoords') + locs = m.get("geolocs") + coords = m.get("geocoords") if not locs: continue if locs and coords and len(locs) == len(coords): @@ -284,10 +312,14 @@ def sub_geo(args): if not coord: coord = places[loc] = geoencoding(session, loc) coords.append(coord) - m['geocoords'] = coords + m["geocoords"] = coords with open(SOURCE_YAML, "w") as output: - print(ordered_dump_yaml(mirrors, encoding=None, allow_unicode=True, - default_flow_style=False), file=output) + print( + ordered_dump_yaml( + mirrors, encoding=None, allow_unicode=True, default_flow_style=False + ), + file=output, + ) def sub_geojson(args): @@ -296,7 +328,7 @@ def sub_geojson(args): "type": "FeatureCollection", "features": features, } - with open(SOURCE_YAML, 'r') as source: + with open(SOURCE_YAML, "r") as source: try: mirrors = ordered_load_yaml(source) # mirrors.sort(key=lambda m: -mirror_score(m)) @@ -304,65 +336,70 @@ def sub_geojson(args): sys.exit(repr(e)) for m in mirrors: - coords = m.get('geocoords') + coords = m.get("geocoords") if not coords: continue - locs = m['geolocs'] + locs = m["geolocs"] for loc, coord in zip(locs, coords): - lat, lon = coord.split(', ') + lat, lon = coord.split(", ") feature = { "type": "Feature", "properties": { - "mirror": m['provider'], - "url": m['url'], + "mirror": m["provider"], + "url": m["url"], "name": loc, }, "geometry": { "type": "Point", "coordinates": [float(lon), float(lat)], - } + }, } features.append(feature) - with open(OUTPUT_GEOJSON, 'w') as f: + with open(OUTPUT_GEOJSON, "w") as f: json.dump(geojson, f, ensure_ascii=False) def main(): parser = argparse.ArgumentParser( - description='update mirrors protocols and generate mirrorlist and README.md') + description="update mirrors protocols and generate mirrorlist and README.md" + ) sub = parser.add_subparsers() - listparser = sub.add_parser('list', help=f'list mirrors in {SOURCE_YAML}') + listparser = sub.add_parser("list", help=f"list mirrors in {SOURCE_YAML}") listparser.set_defaults(func=sub_list) protparser = sub.add_parser( - 'protocols', help='try access to URLs of the mirrors and update the protocols') + "protocols", help="try access to URLs of the mirrors and update the protocols" + ) protparser.set_defaults(func=sub_protocols) readmeparser = sub.add_parser( - 'readme', help=f'generate {OUTPUT_README} from {SOURCE_YAML}') + "readme", help=f"generate {OUTPUT_README} from {SOURCE_YAML}" + ) readmeparser.set_defaults(func=sub_readme) mirrorlistparser = sub.add_parser( - 'mirrorlist', help=f'generate {OUTPUT_MIRRORLIST} from {SOURCE_YAML}') + "mirrorlist", help=f"generate {OUTPUT_MIRRORLIST} from {SOURCE_YAML}" + ) mirrorlistparser.set_defaults(func=sub_mirrorlist) - allparser = sub.add_parser('all', help='do all 3 above') + allparser = sub.add_parser("all", help="do all 3 above") allparser.set_defaults(func=sub_all) - geoparser = sub.add_parser( - 'geo', help=f'update geo coordinates for {SOURCE_YAML}') + geoparser = sub.add_parser("geo", help=f"update geo coordinates for {SOURCE_YAML}") geoparser.set_defaults(func=sub_geo) geojsonparser = sub.add_parser( - 'geojson', help=f'generate {OUTPUT_GEOJSON} for {SOURCE_YAML}') + "geojson", help=f"generate {OUTPUT_GEOJSON} for {SOURCE_YAML}" + ) geojsonparser.set_defaults(func=sub_geojson) args = parser.parse_args() - if 'func' not in args: + if "func" not in args: parser.print_help() sys.exit(1) args.func(args) -if __name__ == '__main__': +if __name__ == "__main__": try: import nicelogger - nicelogger.enable_pretty_logging('INFO') + + nicelogger.enable_pretty_logging("INFO") except ImportError: pass main() diff --git a/map.html b/map.html index 58ab398..5272d8a 100644 --- a/map.html +++ b/map.html @@ -1,16 +1,26 @@ - + - + [archlinuxcn] mirror locations - + - - + + + + + + +
+ + + + + + + + + + + + +
ProviderRegionProtocolslastupdatex86_64any
+
+ + + From 49b53716c5a8f220c6e6aa80e958a7c98b6bd202 Mon Sep 17 00:00:00 2001 From: auxiliarypower <134689569+auxiliarypower@users.noreply.github.com> Date: Wed, 6 Dec 2023 19:29:36 +0800 Subject: [PATCH 09/10] feat(map.html): enhance features - Follow the prettier, stylelint-config-standard and stylelint-config-recess-order rules - Change the page title; Add favicon.ico and page description - Upgrade the dependencies to the latest versions - Change the legend and popup style - Set the zoom level of the map; Avoid showing the coverage area on mouse hover - Remove scale bar and sorting mirror sites function - Improve the color of the mirror site markers --- map.html | 85 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/map.html b/map.html index 5272d8a..f703121 100644 --- a/map.html +++ b/map.html @@ -3,28 +3,38 @@ - [archlinuxcn] mirror locations - + + Arch Linux CN Community Repository Mirrors Map + + - + +
From e4a041f6fe24bcc658cabc1afda36132b76202a8 Mon Sep 17 00:00:00 2001 From: auxiliarypower <134689569+auxiliarypower@users.noreply.github.com> Date: Wed, 6 Dec 2023 15:52:33 +0800 Subject: [PATCH 10/10] feat(application.md): translate to English - Validate that markdown file comply with the markdownlint-cli2 rules --- application.md | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/application.md b/application.md index 920e167..873febf 100644 --- a/application.md +++ b/application.md @@ -1,16 +1,21 @@ -我们欢迎有能力的组织和个人镜像我们的软件仓库。以下是推荐的申请流程: +# How to Apply Mirror of Arch Linux CN Repository -1. 在本仓库开一个 pr,说明相关情况,并在 mirrors.yaml 文件中添加预计建立镜像的相关信息。同时在 pr 中提供一个邮件地址。 -2. 等待含有 rsync 用户名和密码的邮件。 -3. 等镜像初始化完成之后,pr 将被合并。 +We welcome capable organizations and individuals to apply mirror of our software repository. This can improve the performance and reliability of our software distribution, and also provide a better service for the users in your region. To apply, please follow these steps: -推荐的同步命令: +1. Clone this repository and edit [mirrors.yaml](mirrors.yaml) with your mirror information. By following the directive `python manager.py all`, the protocols and coordinates fields will be automatically populated, and README.md, archlinuxcn-mirrorlist, and mirrors.geojson files will be generated. -```sh -RSYNC_PASSWORD=<你的rsync密码> rsync -rtlivH --delete-after --delay-updates --safe-links --max-delete=1000 --contimeout=60 <你的rsync用户名>@sync.repo.archlinuxcn.org::repo . -``` +2. Create a pull request with your changes and describe your situation. Please provide an email address for contact. -关于同步频率:我们的打包机器人 lilac 每天 (Asia/Shanghai, UTC+8) 4、12、20 点多会开始打包,因此建议同步频率6、7小时一次,尽量避开 lilac 打包的时间段。 +3. Wait for our email with the rsync username and password. Use the following command to synchronize your mirror with our main server: -你也可以发送邮件到 repo 位于 archlinuxcn.org 来申请。 + ```bash + RSYNC_PASSWORD= rsync --recursive --times --links --hard-links --safe-links --max-delete=1000 --delete-after --delay-updates --itemize-changes --verbose --contimeout=60 @sync.repo.archlinuxcn.org::repo . + ``` +4. Once your mirror has been initialized, we will merge your pull request and include your mirror in our mirrorlist. + +5. We recommend synchronizing every 6 or 7 hours, and avoiding the time when our automatic packaging system, [lilac](https://github.com/archlinuxcn/lilac), is working at 4, 12 and 20 (Asia/Shanghai, UTC+8) every day. This can prevent potential conflicts and errors during the synchronization process. + +Alternatively, you can email to apply for mirror. We will reply as soon as possible. + +Thank you for your support and contribution!