Estacion-R · pablotis · May 8, 2026 · May 4, 2026 · May 4, 2026 · May 5, 2026
diff --git a/.github/workflows/tests-e2e.yml b/.github/workflows/tests-e2e.yml
@@ -0,0 +1,85 @@
+name: Tests E2E (shinytest2)
+
+### Tests E2E pesados con shinytest2 + Chromote. Por costo (Chrome
+### headless + boot completo de la app + datasets pesados) NO se corren
+### en cada push. Triggers:
+###
+###   - workflow_dispatch: manual desde la UI de Actions, on demand.
+###   - schedule: weekly (domingo 6 AM UTC) como sanity check.
+###
+### El workflow tests-unit.yml sigue siendo la barrera obligatoria de
+### cada PR. Este es complemento, no reemplazo.
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 6 * * 0"  # domingos a las 06:00 UTC
+
+jobs:
+  e2e-tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Setup Chrome
+        uses: browser-actions/setup-chrome@v1
+
+      - name: Setup R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: '4.5.3'
+          use-public-rspm: true
+
+      - name: Cache R packages
+        uses: actions/cache@v4
+        with:
+          path: ${{ env.R_LIBS_USER }}
+          key: ${{ runner.os }}-r-e2e-${{ hashFiles('ETL/00-libraries.R') }}
+          restore-keys: ${{ runner.os }}-r-e2e-
+
+      ### El stack E2E necesita TODOS los paquetes de la app (incluyendo
+      ### highcharter, gt, waiter, bsicons) porque shinytest2 levanta la
+      ### app entera. Es la diferencia más grande con tests-unit.yml.
+      - name: Instalar paquetes R (full stack app + shinytest2)
+        run: |
+          install.packages(c(
+            "testthat", "shinytest2", "chromote",
+            "tibble", "dplyr", "tidyr", "purrr", "readr", "stringr",
+            "glue", "arrow", "withr", "rlang", "assertthat",
+            "shiny", "bslib", "highcharter", "gt", "waiter", "bsicons",
+            "shinychat", "eph"
+          ))
+        shell: Rscript {0}
+
+      ### Datasets pre-computados: los E2E necesitan data_output/ con los
+      ### parquets/CSVs reales. Como están gitignored (~80 MB) hay que
+      ### regenerarlos. Workaround temporal: bajar fixture mínimo o correr
+      ### el ETL completo (lento).
+      ###
+      ### TODO: cuando el pipeline auto-update de issue #pipeline corra
+      ### en GH Actions, podemos descargar el último build.
+      ### Por ahora skipeamos los tests si los datos no están en el runner
+      ### (los tests usan skip() defensivo en helper new_app).
+      - name: Verificar datasets disponibles
+        run: |
+          ls -lh data_output/ || echo "No data_output/ (tests E2E saltarán)"
+
+      - name: Correr tests E2E
+        run: |
+          Rscript tests/testthat.R
+        env:
+          RUN_E2E: "true"
+          NOT_CRAN: "true"
+          R_KEEP_PKG_SOURCE: yes
+
+      ### Si shinytest2 generó snapshots/diffs, subirlos como artifact
+      ### para inspección post-mortem en caso de fallo.
+      - name: Upload snapshots si fallaron
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: shinytest2-snapshots
+          path: tests/testthat/_snaps/
+          if-no-files-found: ignore
diff --git a/.github/workflows/tests-unit.yml b/.github/workflows/tests-unit.yml
@@ -0,0 +1,52 @@
+name: Tests unitarios
+
+on:
+  push:
+    branches: [master, staging]
+  pull_request:
+    branches: [master, staging]
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Setup R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: '4.5.3'
+          use-public-rspm: true
+
+      - name: Cache R packages
+        uses: actions/cache@v4
+        with:
+          path: ${{ env.R_LIBS_USER }}
+          key: ${{ runner.os }}-r-tests-${{ hashFiles('ETL/00-libraries.R') }}
+          restore-keys: ${{ runner.os }}-r-tests-
+
+      ### Paquetes mínimos para correr los tests del Sprint test-1.
+      ### tests/testthat.R NO sourcea 00-libraries.R, así evitamos
+      ### highcharter, gt, waiter, bsicons, brand.yml (UI-only) que
+      ### no son necesarias para tests de funciones puras.
+      ###
+      ### shiny + bslib son requeridos porque algunos tests sourcean
+      ### R/mod_calidad_panel.R (que define funciones que adentro
+      ### usan NS, nav_panel, etc; el source solo las define, no
+      ### las ejecuta).
+      - name: Instalar paquetes R
+        run: |
+          install.packages(c(
+            "testthat", "tibble", "dplyr", "tidyr", "purrr", "readr",
+            "stringr", "glue", "arrow", "withr", "rlang", "assertthat",
+            "shiny", "bslib", "eph"
+          ))
+        shell: Rscript {0}
+
+      - name: Correr tests
+        run: |
+          Rscript tests/testthat.R
+        env:
+          R_KEEP_PKG_SOURCE: yes
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,60 @@ versionado [SemVer](https://semver.org/lang/es/) adaptado a app web:
 
 ---
 
+## [Unreleased]
+
+### Added
+
+- Sprint test-1 batch 3: tests para `arma_matriz_transicion`,
+  `build_tasas_historico`, `regenerar_calidad_panel`, `formato_delta`,
+  `sankey_label_legible`, `sankey_nodes_orden`. Suite de testthat pasa
+  de 79 a 149 tests verde.
+- Sprint test-2: tests de server logic con `shiny::testServer()`.
+  Cubre `mod_calidad_panel_server` (switch trimestral/anual del dataset,
+  filtro por años y dúos, outputs KPI) y `armo_base_panel(window="anual")`
+  con parquet fixture sintético (filter pushdown, drop de cols
+  anio_0/trim_0, errores). Suite pasa a 185 tests verde.
+  `mod_analisis_*_server` se difieren a Sprint test-3 (E2E con
+  shinytest2 es más rentable que pelear el mock de globales).
+- Sprint test-3 lite: 3 tests E2E con `shinytest2` + Chromote para
+  smoke (boot + input `tipo_duo` registrado), toggle tipo_duo
+  (estado trim ↔ anual), y módulo Calidad (KPI render tras navegar
+  al panel). Suite total: **192 tests** (185 unit + 7 E2E con
+  `RUN_E2E=true`). Workflow CI separado `tests-e2e.yml` con
+  `workflow_dispatch` + cron semanal (no en cada PR para no
+  inflar el ciclo).
+
+## [0.9.0] · 2026-05-04
+
+Cierra Sprint A (#44 Tipo de dúo end-to-end). El toggle Interanual
+ahora cubre toda la app: Foto, Película, Tasas, Calidad de la muestra,
+y descargas en sección Datos.
+
+### Added
+
+- **Calidad de la muestra en modo Interanual** (#47 Fase 3). El
+  módulo `mod_calidad_panel` recibe `tipo_duo` y muestra los KPIs +
+  charts del panel correspondiente. Selector "duplas" se adapta:
+  `1→2 / 2→3 / 3→4 / 4→1` (intertrim) ↔ `T1 vs T1 / T2 vs T2 / ... / T4 vs T4` (anual).
+- Nuevo dataset `data_output/calidad_panel_anual_pct_historico.csv`
+  generado por `regenerar_calidad_panel(window = "anual")`.
+- **Tarjeta de descarga del dataset anual** en sección Datos. Al lado
+  de "Panel longitudinal · intertrimestral" aparece "Panel
+  longitudinal · interanual" con dropdown Parquet/CSV gzip
+  (16 MB / 18 MB).
+
+### Changed
+
+- `regenerar_calidad_panel()` acepta parámetro `window`. En anual los
+  dúos van T_n año X → T_n año X+1, periodo formato `YYYY_tN`. La
+  detección de inconsistencia de edad usa rango `[CH06, CH06 + 2]` en
+  anual (vs +1 en trimestral) para reflejar que entre dos años
+  consecutivos la persona pudo haber cumplido años.
+- `duo_label()` acepta `window`: en anual devuelve `"tN"` en lugar
+  de `"tN-tM"`.
+
+---
+
 ## [0.8.1] · 2026-05-04
 
 ### Fixed

diff --git a/ETL/01-extract.R b/ETL/01-extract.R
@@ -113,19 +113,23 @@ df_tasas_formalidad_amp_anual  <- cargar_tasas_csv("data_output/tasas_formalidad
 ### ETL/10-build_calidad_panel.R y mantenido al día por 03-update_data.R.
 ### Schema: periodo, anio_0, trim_0, anio_1, trim_1, n_t0, pondera_t0,
 ### n_panel, pondera_panel, pct_encontrado_n, pct_encontrado_pondera.
-path_calidad <- "data_output/calidad_panel_pct_historico.csv"
-df_calidad_panel <- if (file.exists(path_calidad)) {
-  arrow::read_csv_arrow(path_calidad) |>
-    dplyr::collect() |>
-    dplyr::arrange(anio_0, trim_0)
-} else {
-  tibble::tibble(periodo = character(), anio_0 = integer(), trim_0 = integer(),
-                 anio_1 = integer(), trim_1 = integer(),
-                 n_t0 = integer(), pondera_t0 = double(),
-                 n_panel = integer(), pondera_panel = double(),
-                 pct_encontrado_n = double(), pct_encontrado_pondera = double())
+cargar_calidad_csv <- function(path) {
+  if (file.exists(path)) {
+    arrow::read_csv_arrow(path) |>
+      dplyr::collect() |>
+      dplyr::arrange(anio_0, trim_0)
+  } else {
+    tibble::tibble(periodo = character(), anio_0 = integer(),
+                   trim_0 = integer(), anio_1 = integer(), trim_1 = integer(),
+                   n_t0 = integer(), pondera_t0 = double(),
+                   n_panel = integer(), pondera_panel = double(),
+                   pct_encontrado_n = double(),
+                   pct_encontrado_pondera = double())
+  }
 }
-rm(path_calidad)
+df_calidad_panel       <- cargar_calidad_csv("data_output/calidad_panel_pct_historico.csv")
+### Versión ANUAL (issue #47). Generada por ETL/11-build_historicos_anuales.R.
+df_calidad_panel_anual <- cargar_calidad_csv("data_output/calidad_panel_anual_pct_historico.csv")
 
 ### Rango de períodos disponibles (insumo para los selectInput dinámicos).
 ### Se deriva del panel_runtime: cualquier (anio_0, trim_0) es un trimestre

diff --git a/ETL/11-build_historicos_anuales.R b/ETL/11-build_historicos_anuales.R
@@ -131,4 +131,18 @@ readr::write_csv(tasas_formalidad_amp_anual,
                  "data_output/tasas_formalidad_ampliada_anual_historico.csv")
 cat(glue::glue("  tasas_formalidad_ampliada_anual_historico.csv OK ({nrow(tasas_formalidad_amp_anual)} filas)\n\n"))
 
+
+### --------------------------------------------------------------------
+### Calidad del panel (issue #47)
+### --------------------------------------------------------------------
+
+cat("--- Calidad del panel anual ---\n\n")
+
+regenerar_calidad_panel(
+  path_csv     = "data_output/calidad_panel_anual_pct_historico.csv",
+  df_microdato = df_eph_full,
+  window       = "anual"
+)
+
+
 cat("=== Pre-cómputo de históricos anuales completo ===\n")
diff --git a/ETL/99-functions.R b/ETL/99-functions.R
@@ -506,7 +506,8 @@ armo_tabla_sankey <- function(table, categoria){
 ###   periodo, anio_0, trim_0, anio_1, trim_1,
 ###   n_t0, pondera_t0, n_panel, pondera_panel,
 ###   pct_encontrado_n, pct_encontrado_pondera
-regenerar_calidad_panel <- function(path_csv, df_microdato) {
+regenerar_calidad_panel <- function(path_csv, df_microdato,
+                                    window = "trimestral") {
 
   hist_existente <- if (file.exists(path_csv)) {
     readr::read_csv(path_csv, show_col_types = FALSE)
@@ -520,18 +521,33 @@ regenerar_calidad_panel <- function(path_csv, df_microdato) {
     character(0)
   }
 
-  ### Mismo cómputo de dúos válidos que regenerar_panel_historico().
-  duos_posibles <- df_microdato |>
-    dplyr::distinct(ANO4, TRIMESTRE) |>
-    dplyr::arrange(ANO4, TRIMESTRE) |>
-    dplyr::mutate(
-      anio_post  = dplyr::if_else(TRIMESTRE %in% 1:3, ANO4, ANO4 + 1L),
-      trim_post  = dplyr::if_else(TRIMESTRE %in% 1:3, TRIMESTRE + 1L, 1L),
-      tiene_post = paste(anio_post, trim_post) %in%
-        paste(df_microdato$ANO4, df_microdato$TRIMESTRE)
-    ) |>
-    dplyr::filter(tiene_post) |>
-    dplyr::mutate(periodo = glue::glue("{ANO4}_t{TRIMESTRE}-t{trim_post}"))
+  ### Cómputo de dúos válidos según window (issue #47).
+  ### Mismo patrón que regenerar_panel_historico y build_tasas_historico.
+  duos_posibles <- if (window == "anual") {
+    df_microdato |>
+      dplyr::distinct(ANO4, TRIMESTRE) |>
+      dplyr::arrange(ANO4, TRIMESTRE) |>
+      dplyr::mutate(
+        anio_post  = ANO4 + 1L,
+        trim_post  = TRIMESTRE,
+        tiene_post = paste(anio_post, trim_post) %in%
+          paste(df_microdato$ANO4, df_microdato$TRIMESTRE)
+      ) |>
+      dplyr::filter(tiene_post) |>
+      dplyr::mutate(periodo = glue::glue("{ANO4}_t{TRIMESTRE}"))
+  } else {
+    df_microdato |>
+      dplyr::distinct(ANO4, TRIMESTRE) |>
+      dplyr::arrange(ANO4, TRIMESTRE) |>
+      dplyr::mutate(
+        anio_post  = dplyr::if_else(TRIMESTRE %in% 1:3, ANO4, ANO4 + 1L),
+        trim_post  = dplyr::if_else(TRIMESTRE %in% 1:3, TRIMESTRE + 1L, 1L),
+        tiene_post = paste(anio_post, trim_post) %in%
+          paste(df_microdato$ANO4, df_microdato$TRIMESTRE)
+      ) |>
+      dplyr::filter(tiene_post) |>
+      dplyr::mutate(periodo = glue::glue("{ANO4}_t{TRIMESTRE}-t{trim_post}"))
+  }
 
   duos_a_calcular <- duos_posibles |>
     dplyr::filter(!periodo %in% periodos_existentes)
@@ -557,22 +573,26 @@ regenerar_calidad_panel <- function(path_csv, df_microdato) {
         anio_0      = ANO4, trimestre_0 = TRIMESTRE,
         anio_1      = anio_post, trimestre_1 = trim_post,
         df          = df_microdato,
-        variables   = c("ESTADO", "PONDERA", "CH04", "CH06")
+        variables   = c("ESTADO", "PONDERA", "CH04", "CH06"),
+        window      = window
       ) |>
         dplyr::filter(ESTADO %in% 1:4)
 
       ### Detección de inconsistencias específicas:
       ###   - sexo:  CH04 t0 ≠ CH04 t1 (debe ser invariante).
-      ###   - edad:  CH06_t1 fuera del rango [CH06, CH06 + 1] (en un panel
-      ###            de 1 trimestre, la edad sube como mucho 1 año).
+      ###   - edad:  CH06_t1 fuera del rango esperado.
+      ###     * trimestral: [CH06, CH06 + 1] (1 trim → max +1 año).
+      ###     * anual: [CH06, CH06 + 2] (1 año → max +1, +2 si cumplió
+      ###       años en el medio del año móvil).
       ### Una persona puede tener ambas inconsistencias a la vez; la
       ### "total" es el flag de eph::organize_panels (más amplio: incluye
       ### otras cosas como saltos en NIVEL_ED si estuvieran).
+      max_delta_edad <- if (window == "anual") 2L else 1L
       panel_inc <- panel |>
         dplyr::mutate(
           inc_sexo = !is.na(CH04) & !is.na(CH04_t1) & CH04 != CH04_t1,
           inc_edad = !is.na(CH06) & !is.na(CH06_t1) &
-                     (CH06_t1 < CH06 | CH06_t1 > CH06 + 1L),
+                     (CH06_t1 < CH06 | CH06_t1 > CH06 + max_delta_edad),
           inc_total = !consistencia
         )