Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/s4d'
Browse files Browse the repository at this point in the history
  • Loading branch information
EliezerIsrael committed Oct 20, 2022
2 parents 842a5a8 + dc3df12 commit a052e90
Show file tree
Hide file tree
Showing 16 changed files with 362 additions and 48 deletions.
19 changes: 14 additions & 5 deletions build/ci/production-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,21 @@ restore:
tarball: jmc-private_dump_21.09.22.tar
serviceAccount: database-backup-read
backup:
#legacy compat section, remove once deploy chart sufficienctly updated
enabled: true
prefix: ""
public: true
bucket: sefaria-contextus-backup
archiveBucket: sefaria-contextus-archive
serviceAccount: database-backup-write
mongo:
enabled: true
prefix: ""
public: true
bucket: sefaria-contextus-backup
archiveBucket: sefaria-contextus-archive
serviceAccount: database-backup-write
postgres:
enabled: false
version: 10.3
prefix: ""
bucket: sefaria-user-backup
serviceAccount: database-backup-write
web:
containerImage:
imageRegistry:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if eq .Values.backup.enabled true }}
{{- if eq .Values.backup.mongo.enabled true }}
apiVersion: v1
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -69,7 +69,7 @@ data:
echo "building small private tar file"
tar cvzf "${DATADIR}/private_dump_small.tar.gz" -C "${DATADIR}" ./dump
{{- if .Values.backup.public }}
{{- if .Values.backup.mongo.public }}
echo "creating public dump"
until mongodump --host=mongo:27017 -d sefaria -v --collection texts --query '{"license": {"$not": {"$regex": "/^Copyright/i"}}}' -o "${DATADIR}/dump"
do
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{- if eq .Values.backup.postgres.enabled true }}
apiVersion: v1
kind: ConfigMap
metadata:
name: create-pg-dumps-{{ .Values.deployEnv }}
labels:
deployEnv: "{{ .Values.deployEnv }}"
{{- include "sefaria.labels" . | nindent 4 }}
data:
create-dumps.sh: |-
#!/usr/bin/env bash
DATADIR="/pgdumps/shared_volume"
DUMP_CMD="pg_dump --file $DATADIR/pg.dump --host $DATABASES_HOST --port $DATABASES_PORT"
DUMP_CMD+=" --username $DATABASES_USER"
IF [[ ! $DATABASES_PASSWORD ]]; THEN
DUMP_CMD+=" --no_passowrd"
ELSE
DUMP_CMD+=" --password $DATABASES_PASSWORD"
FI
DUMP_CMD+=" --verbose --format=c --no-owner --section=pre-data --section=data --no-privileges"
DUMP_CMD+=" --no-tablespaces --no-unlogged-table-data 'sefaria_auth'"
eval $DUMP_CMD
tar cvzf "${DATADIR}/postgres_dump.tar.gz" -C "${DATADIR}" ./dump
{{- end }}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if .Values.backup.enabled }}
{{- if .Values.backup.mongo.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
Expand All @@ -10,7 +10,7 @@ data:
upload-dumps.sh: |-
#!/usr/bin/env bash
set -x
{{- if .Values.backup.serviceAccount }}
{{- if .Values.backup.mongo.serviceAccount }}
gloud auth activate
{{- else }}
gcloud auth activate-service-account --key-file ${GOOGLE_APPLICATION_CREDENTIALS}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{{- if .Values.backup.postgres.enabled }}
apiVersion: v1
kind: ConfigMap
metadata:
name: upload-pg-dumps-{{ .Values.deployEnv }}
labels:
deployEnv: "{{ .Values.deployEnv }}"
{{- include "sefaria.labels" . | nindent 4 }}
data:
upload-dumps.sh: |-
#!/usr/bin/env bash
set -x
{{- if .Values.backup.postgres.serviceAccount }}
gloud auth activate
{{- else }}
gcloud auth activate-service-account --key-file ${GOOGLE_APPLICATION_CREDENTIALS}
{{- end }}
cd "/pgdumps/shared_volume"
today="$(date +'%d.%m.%y')"
last_month="$(date --date='last month' +'%d.%m.%y')"
gsutil rm "gs://${BUCKET}/${PREFIX}postgres_dump_${last_month}.tar.gz"
if [ -f "postgres_dump.tar.gz" ]; then
echo "uploading private dump"
gsutil cp postgres_dump.tar.gz "gs://${BUCKET}/${PREFIX}postgres_dump_${today}.tar.gz"
else
echo "Private dump missing"
fi
curl -X POST --data-urlencode 'payload={"channel": "#engineering", "username": "Data Archiver", "text": "The {{ .Values.deployEnv }} Postgres Database was routinely dumped to cloud storage: '"$(date)"'", "icon_emoji": ":cloud:"}' ${SLACK_URL}
{{- end }}
12 changes: 6 additions & 6 deletions helm-chart/sefaria-project/templates/cronjob/mongo-backup.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if eq .Values.backup.enabled true }}
{{- if eq .Values.backup.mongo.enabled true }}
---
apiVersion: batch/v1beta1
kind: CronJob
Expand All @@ -14,7 +14,7 @@ spec:
backoffLimit: 1
template:
spec:
serviceAccount: {{ .Values.backup.serviceAccount }}
serviceAccount: {{ .Values.backup.mongo.serviceAccount }}
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
Expand All @@ -32,7 +32,7 @@ spec:
effect: "NoSchedule"
initContainers:
- name: mongo-dumper
image: mongo:4.4
image: mongo:{{ .Values.backup.mongo.version }}
envFrom:
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
Expand Down Expand Up @@ -66,11 +66,11 @@ spec:
readOnly: true
env:
- name: PREFIX
value: {{ .Values.backup.prefix }}
value: {{ .Values.backup.mongo.prefix }}
- name: BUCKET
value: {{ .Values.backup.bucket }}
value: {{ .Values.backup.mongo.bucket }}
- name: ARCHIVE_BUCKET
value: {{ .Values.backup.archiveBucket }}
value: {{ .Values.backup.mongo.archiveBucket }}
- name: SLACK_URL
valueFrom:
secretKeyRef:
Expand Down
97 changes: 97 additions & 0 deletions helm-chart/sefaria-project/templates/cronjob/postgres-packup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
{{- if eq .Values.backup.postgres.enabled true }}
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: {{ .Values.deployEnv }}-postgresbackup
labels:
{{- include "sefaria.labels" . | nindent 4 }}
spec:
concurrencyPolicy: Replace
schedule: "0 0 * * 0"
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
serviceAccount: {{ .Values.backup.postgres.serviceAccount }}
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- postgres
topologyKey: kubernetes.io/hostname
tolerations:
- key: schedule-on-database-vm
operator: "Equal"
value: "true"
effect: "NoSchedule"
initContainers:
- name: postgres-dumper
image: postgres:{{ .Values.backup.postgres.version }}
envFrom:
- secretRef:
name: {{ .Values.secrets.localSettings.ref }}
- configMapRef:
name: local-settings-{{ .Values.deployEnv }}
- secretRef:
name: local-settings-secrets-{{ .Values.deployEnv }}
optional: true
volumeMounts:
- name: shared-volume
mountPath: /pgdumps/shared_volume
- name: create-dumps-script
mountPath: /scripts/create-dumps.sh
subPath: create-dumps.sh
readOnly: true
command: ["bash"]
args: ["-c", "/scripts/create-dumps.sh"]
resources:
limits:
memory: "500Mi"
containers:
- name: pgdump-uploader
image: google/cloud-sdk
volumeMounts:
- name: shared-volume
mountPath: /pgdumps/shared_volume
- name: upload-dumps-script
mountPath: /scripts/upload-dumps.sh
subPath: upload-dumps.sh
readOnly: true
env:
- name: PREFIX
value: {{ .Values.backup.postgres.prefix }}
- name: BUCKET
value: {{ .Values.backup.postgres.bucket }}
- name: ARCHIVE_BUCKET
value: {{ .Values.backup.postgres.archiveBucket }}
- name: SLACK_URL
valueFrom:
secretKeyRef:
name: {{ template "sefaria.secrets.slackWebhook" . }}
key: slack-webhook
command: ["bash"]
args: ["-c", "/scripts/upload-dumps.sh"]
resources:
limits:
memory: "500Mi"
restartPolicy: OnFailure
volumes:
- name: create-dumps-script
configMap:
name: create-pg-dumps-{{ .Values.deployEnv }}
defaultMode: 0755
- name: upload-dumps-script
configMap:
name: upload-pg-dumps-{{ .Values.deployEnv }}
defaultMode: 0755
- name: shared-volume
emptyDir: {}
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 2
{{- end }}
20 changes: 14 additions & 6 deletions helm-chart/sefaria-project/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,20 @@ restore:

# config to backup environment DB
backup:
enabled: false
prefix: ""
public: true
bucket: sefaria-mongo-backup
archiveBucket: sefaria-mongo-archive
serviceAccount: database-backup-write
mongo:
enabled: false
prefix: ""
public: true
bucket: sefaria-mongo-backup
archiveBucket: sefaria-mongo-archive
serviceAccount: database-backup-write
version: 4.4
postgres:
enabled: false
version: 10.3
prefix: ""
bucket: sefaria-user-backup
serviceAccount: database-backup-write

web:
# key-pair values to load into web pod environment. Takes precedence over global localsettings
Expand Down
32 changes: 23 additions & 9 deletions sefaria/model/lexicon.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ class LexiconEntry(abst.AbstractMongoRecord):
"catane_number",
"rid",
"strong_numbers",
"GK",
"TWOT",
'peculiar',
'all_cited',
'ordinal',
Expand Down Expand Up @@ -270,11 +272,11 @@ def get_sense(self, sense):

class BDBEntry(DictionaryEntry):
required_attrs = DictionaryEntry.required_attrs + ["content", "rid"]
optional_attrs = ['strong_numbers', 'next_hw', 'prev_hw', 'peculiar', 'all_cited', 'ordinal', 'brackets', 'headword_suffix', 'alt_headwords', 'root', 'occurrences', 'quotes']
optional_attrs = ['strong_numbers', 'next_hw', 'prev_hw', 'peculiar', 'all_cited', 'ordinal', 'brackets', 'headword_suffix', 'alt_headwords', 'root', 'occurrences', 'quotes', 'GK', 'TWOT']

def headword_string(self):
hw = f'<span dir="rtl">{re.sub("[⁰¹²³⁴⁵⁶⁷⁸⁹]*", "", self.headword)}</span>'
if hasattr(self, 'occurrences'):
if hasattr(self, 'occurrences') and not hasattr(self, 'headword_suffix'):
hw += f'</big><sub>{self.occurrences}</sub><big>' #the sub shouldn't be in big
alts = []
if hasattr(self, 'alt_headwords'):
Expand All @@ -286,16 +288,18 @@ def headword_string(self):
if getattr(self, 'brackets', '') == 'all':
if hasattr(self, 'headword_suffix'):
hw = f'[{hw}{self.headword_suffix}]' #if there's a space, it'll be part of headword_suffix
if hasattr(self, 'occurrences'):
hw += f'</big><sub>{self.occurrences}</sub><big>'
else:
hw = ", ".join([hw] + alts)
hw = f'[{", ".join([hw] + alts)}]'
else:
if hasattr(self, 'brackets') and self.brackets == 'first_word':
hw = f'[{hw}]'
if hasattr(self, 'alt_headwords'):
hw = ", ".join([hw] + alts)
hw = f'<big>{hw}</big>'
if hasattr(self, 'root'):
hw = f'<big>{hw}</big>'
hw = re.sub('(</?big>)', r'\1\1', hw)
if hasattr(self, 'ordinal'):
hw = f'{self.ordinal} {hw}'
if hasattr(self, 'all_cited'):
Expand All @@ -311,12 +315,22 @@ def get_alt_headwords(self):

def get_sense(self, sense):
string = ''
if 'note' in sense:
string = '<em>Note.</em>'
if 'pre_num' in sense:
string += f"{sense['pre_num']} "
if 'all_cited' in sense:
string += '†'
if 'form' in sense:
string += f'<strong>{sense["form"]}</strong> '
if 'num' in sense:
string += f'<strong>{sense["num"]}</strong> '
if 'note' in sense:
string += f' {sense["num"]}'
else:
string += f'<strong>{sense["form"]}</strong>'
elif 'num' in sense:
string += f'<strong>{sense["num"]}</strong>'
if 'occurrences' in sense:
string += f'<sub>{sense["occurrences"]}</sub>'
string += ' '
if 'definition' in sense:
return string + sense['definition']
else:
Expand All @@ -335,12 +349,12 @@ def as_strings(self, with_headword=True):
for sense in self.content['senses']:
sense = self.get_sense(sense)
if type(sense) == list:
strings += sense
strings.append(' '.join(sense))
else:
strings.append(sense)
if with_headword:
strings[0] = self.headword_string() + ' ' + strings[0]
return strings
return ['<br>'.join(strings)]


class LexiconEntrySubClassMapping(object):
Expand Down
6 changes: 3 additions & 3 deletions sefaria/model/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,7 +1251,7 @@ def has_manually_wrapped_refs(self):
class Version(AbstractTextRecord, abst.AbstractMongoRecord, AbstractSchemaContent):
"""
A version of a text.
NOTE: AbstractTextRecord is inherited before AbastractMongoRecord in order to overwrite ALLOWED_TAGS
NOTE: AbstractTextRecord is inherited before AbstractMongoRecord in order to overwrite ALLOWED_TAGS
Relates to a complete single record from the texts collection.
A new version is created with a dict of correlating information inside. Two example fields are below:
Expand Down Expand Up @@ -1398,7 +1398,7 @@ def is_copyrighted(self):

def walk_thru_contents(self, action, item=None, tref=None, heTref=None, schema=None, addressTypes=None, terms_dict=None):
"""
Walk through content of version and run `action` for each segment. Only required parameter to call is `action`
Walk through the contents of a version and run `action` for each segment. Only required parameter to call is `action`
:param func action: (segment_str, tref, he_tref, version) => None
action() is a callback function that can have any behavior you would like. It should return None.
Expand Down Expand Up @@ -1593,7 +1593,7 @@ def __call__(cls, *args, **kwargs):

class TextChunk(AbstractTextRecord, metaclass=TextFamilyDelegator):
"""
A chunk of text corresponding to the provided :class:`Ref`, language, and optionall version name.
A chunk of text corresponding to the provided :class:`Ref`, language, and optional version name.
If it is possible to get a more complete text by merging multiple versions, a merged result will be returned.
:param oref: :class:`Ref`
Expand Down
Loading

0 comments on commit a052e90

Please sign in to comment.